{"id":"https://openalex.org/W2931696260","doi":"https://doi.org/10.3390/s19071547","title":"The Actor-Dueling-Critic Method for Reinforcement Learning","display_name":"The Actor-Dueling-Critic Method for Reinforcement Learning","publication_year":2019,"publication_date":"2019-03-30","ids":{"openalex":"https://openalex.org/W2931696260","doi":"https://doi.org/10.3390/s19071547","mag":"2931696260","pmid":"https://pubmed.ncbi.nlm.nih.gov/30935035"},"language":"en","primary_location":{"id":"doi:10.3390/s19071547","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s19071547","pdf_url":"https://www.mdpi.com/1424-8220/19/7/1547/pdf?version=1553935407","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1424-8220/19/7/1547/pdf?version=1553935407","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048931545","display_name":"Menghao Wu","orcid":"https://orcid.org/0000-0002-1683-6449"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]},{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN","FI"],"is_corresponding":false,"raw_author_name":"Menghao Wu","raw_affiliation_strings":["College of Automation, Harbin Engineering University, Harbin 150001, China","Department of Computer Science, Aalto University, 02150 Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"College of Automation, Harbin Engineering University, Harbin 150001, China","institution_ids":["https://openalex.org/I151727225"]},{"raw_affiliation_string":"Department of Computer Science, Aalto University, 02150 Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016203857","display_name":"Yanbin Gao","orcid":"https://orcid.org/0000-0001-8297-0358"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanbin Gao","raw_affiliation_strings":["College of Automation, Harbin Engineering University, Harbin 150001, China"],"affiliations":[{"raw_affiliation_string":"College of Automation, Harbin Engineering University, Harbin 150001, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006624933","display_name":"Alexander Jung","orcid":"https://orcid.org/0000-0001-7538-0990"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Alexander Jung","raw_affiliation_strings":["Department of Computer Science, Aalto University, 02150 Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalto University, 02150 Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381999","display_name":"Qiang Zhang","orcid":"https://orcid.org/0000-0003-3776-9799"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Zhang","raw_affiliation_strings":["College of Automation, Harbin Engineering University, Harbin 150001, China"],"affiliations":[{"raw_affiliation_string":"College of Automation, Harbin Engineering University, Harbin 150001, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018551628","display_name":"Shitong Du","orcid":"https://orcid.org/0000-0002-7940-7844"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shitong Du","raw_affiliation_strings":["College of Automation, Harbin Engineering University, Harbin 150001, China"],"affiliations":[{"raw_affiliation_string":"College of Automation, Harbin Engineering University, Harbin 150001, China","institution_ids":["https://openalex.org/I151727225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016203857"],"corresponding_institution_ids":["https://openalex.org/I151727225"],"apc_list":{"value":2400,"currency":"CHF","value_usd":2598},"apc_paid":{"value":2400,"currency":"CHF","value_usd":2598},"fwci":1.3006,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.85302119,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"19","issue":"7","first_page":"1547","last_page":"1547"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9645000100135803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.889424204826355},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7607183456420898},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5683032870292664},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5651705265045166},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5441468954086304},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.533622682094574},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5210370421409607},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5186009407043457},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5030965209007263},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.46684345602989197},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4467692971229553},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.44377273321151733},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41217541694641113},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2656668424606323},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10240224003791809},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07431110739707947}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.889424204826355},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7607183456420898},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5683032870292664},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5651705265045166},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5441468954086304},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.533622682094574},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5210370421409607},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5186009407043457},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5030965209007263},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.46684345602989197},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4467692971229553},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.44377273321151733},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41217541694641113},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2656668424606323},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10240224003791809},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07431110739707947},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012371","descriptor_name":"Robotics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012371","descriptor_name":"Robotics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012371","descriptor_name":"Robotics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":6,"locations":[{"id":"doi:10.3390/s19071547","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s19071547","pdf_url":"https://www.mdpi.com/1424-8220/19/7/1547/pdf?version=1553935407","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},{"id":"pmid:30935035","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/30935035","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors (Basel, Switzerland)","raw_type":null},{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/37789","is_oa":true,"landing_page_url":"https://research.aalto.fi/en/publications/e0d0f9d9-d4f8-41a9-b4fa-fa0d3935107c","pdf_url":null,"source":{"id":"https://openalex.org/S4306401663","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"},{"id":"pmh:oai:doaj.org/article:7e5f8223a866476f8da408c8387952ee","is_oa":true,"landing_page_url":"https://doaj.org/article/7e5f8223a866476f8da408c8387952ee","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors, Vol 19, Iss 7, p 1547 (2019)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1424-8220/19/7/1547/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/s19071547","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:6479875","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/6479875","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors (Basel)","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/s19071547","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s19071547","pdf_url":"https://www.mdpi.com/1424-8220/19/7/1547/pdf?version=1553935407","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1426318481","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G1880386336","display_name":null,"funder_award_id":"China Scholarship Council (CSC)","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1979540081","display_name":null,"funder_award_id":"201706","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2300736770","display_name":null,"funder_award_id":"(CSC)","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G3104405933","display_name":null,"funder_award_id":"61803118","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5249334224","display_name":null,"funder_award_id":"61803","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8410698183","display_name":null,"funder_award_id":"201706680063","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G8589651859","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321394","display_name":"Aalto-Yliopisto","ror":"https://ror.org/020hwjq30"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2931696260.pdf","grobid_xml":"https://content.openalex.org/works/W2931696260.grobid-xml"},"referenced_works_count":72,"referenced_works":["https://openalex.org/W567721252","https://openalex.org/W1658008008","https://openalex.org/W1757796397","https://openalex.org/W1848002569","https://openalex.org/W1906772730","https://openalex.org/W1966086707","https://openalex.org/W1999156278","https://openalex.org/W2029143333","https://openalex.org/W2046376809","https://openalex.org/W2087751887","https://openalex.org/W2099471712","https://openalex.org/W2103120971","https://openalex.org/W2104733512","https://openalex.org/W2121863487","https://openalex.org/W2126909264","https://openalex.org/W2127107099","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2155027007","https://openalex.org/W2155968351","https://openalex.org/W2156737235","https://openalex.org/W2161361734","https://openalex.org/W2163605009","https://openalex.org/W2165150801","https://openalex.org/W2169115793","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2260756217","https://openalex.org/W2513734981","https://openalex.org/W2522340145","https://openalex.org/W2530887700","https://openalex.org/W2554120691","https://openalex.org/W2565555125","https://openalex.org/W2567015638","https://openalex.org/W2571460869","https://openalex.org/W2575705757","https://openalex.org/W2594103415","https://openalex.org/W2745868649","https://openalex.org/W2746553466","https://openalex.org/W2781585732","https://openalex.org/W2781726626","https://openalex.org/W2786303200","https://openalex.org/W2792404612","https://openalex.org/W2795910581","https://openalex.org/W2906798544","https://openalex.org/W2907537824","https://openalex.org/W2910427378","https://openalex.org/W2919115771","https://openalex.org/W2949561945","https://openalex.org/W2950471160","https://openalex.org/W2951527505","https://openalex.org/W2951799221","https://openalex.org/W2962709623","https://openalex.org/W2962887844","https://openalex.org/W2963019567","https://openalex.org/W2963403593","https://openalex.org/W2963428623","https://openalex.org/W2964043796","https://openalex.org/W2964077562","https://openalex.org/W2964161785","https://openalex.org/W3021208093","https://openalex.org/W3100789280","https://openalex.org/W4214717370","https://openalex.org/W4231109964","https://openalex.org/W6616173779","https://openalex.org/W6675999342","https://openalex.org/W6682137061","https://openalex.org/W6682849425","https://openalex.org/W6683195989","https://openalex.org/W6684205842","https://openalex.org/W6692405165","https://openalex.org/W6696324988"],"related_works":["https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W3099153698","https://openalex.org/W4297873223","https://openalex.org/W3009457412","https://openalex.org/W2350784623"],"abstract_inverted_index":{"Model-free":[0],"reinforcement":[1,22],"learning":[2,23],"is":[3,97,129,186],"a":[4,51,108,172],"powerful":[5],"and":[6,45,67,101,164,169,189],"efficient":[7],"machine-learning":[8],"paradigm":[9],"which":[10,88],"has":[11],"been":[12],"generally":[13],"used":[14],"in":[15,47,50,68,142,196],"the":[16,21,25,33,64,69,74,81,91,112,122,127,132,138,143,151,177,183,193],"robotic":[17],"control":[18,162],"domain.":[19],"In":[20],"setting,":[24],"value":[26,35,96],"function":[27],"method":[28,156,195],"learns":[29],"policies":[30],"by":[31,79,131],"maximizing":[32],"state-action":[34],"(<i>Q</i>":[36],"value),":[37],"but":[38],"it":[39,106,148],"suffers":[40],"from":[41],"inaccurate":[42],"<i>Q</i>":[43,114],"estimation":[44],"results":[46,181],"poor":[48],"performance":[49],"stochastic":[52],"environment.":[53],"To":[54],"mitigate":[55],"this":[56,119],"issue,":[57],"we":[58,72,104,136,170],"present":[59],"an":[60,165],"approach":[61,120,185],"based":[62],"on":[63,159],"actor-critic":[65],"framework,":[66],"critic":[70,144],"branch":[71,145],"modify":[73],"manner":[75],"of":[76,99],"estimating":[77],"<i>Q</i>-value":[78],"introducing":[80],"advantage":[82],"function,":[83],"such":[84],"as":[85,107,121],"dueling":[86,133,139],"network,":[87],"can":[89],"estimate":[90],"action-advantage":[92,95],"value.":[93,115],"The":[94,155,180],"independent":[98],"state":[100],"environment":[102,174],"noise,":[103],"use":[105],"fine-tuning":[109],"factor":[110],"to":[111,118,146,150,175],"estimated":[113],"We":[116],"refer":[117],"actor-dueling-critic":[123],"(ADC)":[124],"network":[125,140],"since":[126],"frame":[128],"inspired":[130],"network.":[134],"Furthermore,":[135],"redesign":[137],"part":[141],"make":[147],"adapt":[149],"continuous":[152],"action":[153],"space.":[154],"was":[157],"tested":[158],"gym":[160],"classic":[161],"environments":[163],"obstacle":[166],"avoidance":[167],"environment,":[168],"design":[171],"noise":[173,197],"test":[176],"training":[178],"stability.":[179],"indicate":[182],"ADC":[184],"more":[187],"stable":[188],"converges":[190],"faster":[191],"than":[192],"DDPG":[194],"environments.":[198]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":5}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
