{"id":"https://openalex.org/W1967459934","doi":"https://doi.org/10.1162/neco.2009.12-08-922","title":"Derivatives of Logarithmic Stationary Distributions for Policy Gradient Reinforcement Learning","display_name":"Derivatives of Logarithmic Stationary Distributions for Policy Gradient Reinforcement Learning","publication_year":2009,"publication_date":"2009-10-20","ids":{"openalex":"https://openalex.org/W1967459934","doi":"https://doi.org/10.1162/neco.2009.12-08-922","mag":"1967459934","pmid":"https://pubmed.ncbi.nlm.nih.gov/19842990"},"language":"en","primary_location":{"id":"doi:10.1162/neco.2009.12-08-922","is_oa":false,"landing_page_url":"https://doi.org/10.1162/neco.2009.12-08-922","pdf_url":null,"source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/11858/00-001M-0000-0013-C12A-1","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089173802","display_name":"Tetsuro Morimura","orcid":"https://orcid.org/0009-0002-9711-8023"},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Tetsuro Morimura","raw_affiliation_strings":["IBM Research \u2013 Tokyo, Yamato, Kanagawa 242-8502, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research \u2013 Tokyo, Yamato, Kanagawa 242-8502, Japan","institution_ids":["https://openalex.org/I4210145865"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031054137","display_name":"Eiji Uchibe","orcid":"https://orcid.org/0000-0001-7908-0258"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Eiji Uchibe","raw_affiliation_strings":["Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071281836","display_name":"Junichiro Yoshimoto","orcid":"https://orcid.org/0000-0001-7995-0321"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]},{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Junichiro Yoshimoto","raw_affiliation_strings":["Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan, and Nara Institute of Science and Technology, Ikoma, Nara 630-0192, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan, and Nara Institute of Science and Technology, Ikoma, Nara 630-0192, Japan","institution_ids":["https://openalex.org/I75917431","https://openalex.org/I142637625"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071367253","display_name":"Jan Peters","orcid":"https://orcid.org/0000-0002-5266-8091"},"institutions":[{"id":"https://openalex.org/I4210112925","display_name":"Max Planck Institute for Biological Cybernetics","ror":"https://ror.org/026nmvv73","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210112925"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jan Peters","raw_affiliation_strings":["Max Planck Institute for Biological Cybernetics, 72076, T\u00fcbingen, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Biological Cybernetics, 72076, T\u00fcbingen, Germany","institution_ids":["https://openalex.org/I4210112925"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004840638","display_name":"Kenji Doya","orcid":"https://orcid.org/0000-0002-2446-6820"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]},{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kenji Doya","raw_affiliation_strings":["Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan; Nara Institute of Science and Technology, Ikoma, Nara 630-0192, Japan; and ATR Computational Neuroscience Laboratories, Soraku, Kyoto 619-0288, Japan","Nara Institute of Science and Technology, Ikoma, Nara 630-0192, Japan","Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan; Nara Institute of Science and Technology, Ikoma, Nara 630-0192, Japan; and ATR Computational Neuroscience Laboratories, Soraku, Kyoto 619-0288, Japan","institution_ids":["https://openalex.org/I75917431","https://openalex.org/I142637625"]},{"raw_affiliation_string":"Nara Institute of Science and Technology, Ikoma, Nara 630-0192, Japan","institution_ids":["https://openalex.org/I75917431"]},{"raw_affiliation_string":"Okinawa Institute of Science and Technology, Uruma, Okinawa 904-2234, Japan","institution_ids":["https://openalex.org/I142637625"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5004840638","https://openalex.org/A5031054137","https://openalex.org/A5071281836","https://openalex.org/A5071367253","https://openalex.org/A5089173802"],"corresponding_institution_ids":["https://openalex.org/I142637625","https://openalex.org/I4210112925","https://openalex.org/I4210145865","https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":1.3583,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.83956625,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"22","issue":"2","first_page":"342","last_page":"376"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.964900016784668,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7172173261642456},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.5768474340438843},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.4708746671676636},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4422394037246704},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4150255024433136},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4141137897968292},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.3915168046951294},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.336246132850647},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3361099362373352},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.26267170906066895},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.22634229063987732},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.21075710654258728},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.07095929980278015}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7172173261642456},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.5768474340438843},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.4708746671676636},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4422394037246704},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4150255024433136},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4141137897968292},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.3915168046951294},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.336246132850647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3361099362373352},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.26267170906066895},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.22634229063987732},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.21075710654258728},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.07095929980278015}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003198","descriptor_name":"Computer Simulation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003198","descriptor_name":"Computer Simulation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003198","descriptor_name":"Computer Simulation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008962","descriptor_name":"Models, Theoretical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008962","descriptor_name":"Models, Theoretical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008962","descriptor_name":"Models, Theoretical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012201","descriptor_name":"Reward","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012201","descriptor_name":"Reward","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012201","descriptor_name":"Reward","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D055641","descriptor_name":"Mathematical Concepts","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D055641","descriptor_name":"Mathematical Concepts","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D055641","descriptor_name":"Mathematical Concepts","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":7,"locations":[{"id":"doi:10.1162/neco.2009.12-08-922","is_oa":false,"landing_page_url":"https://doi.org/10.1162/neco.2009.12-08-922","pdf_url":null,"source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},{"id":"pmid:19842990","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/19842990","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural computation","raw_type":null},{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:55395","is_oa":false,"landing_page_url":"http://tubiblio.ulb.tu-darmstadt.de/view/person/Morimura=3AT=2E=3A=3A.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"NonPeerReviewed"},{"id":"pmh:oai:pure.mpg.de:item_1788909","is_oa":true,"landing_page_url":"https://hdl.handle.net/11858/00-001M-0000-0013-C12A-1","pdf_url":"http://hdl.handle.net/11858/00-001M-0000-0013-C12A-1","source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Neural computation","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.180.3577","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.180.3577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.kyb.tuebingen.mpg.de/publications/attachments/LSD_revise_ver3_5904%5B0%5D.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.911.944","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.911.944","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.researchgate.net/profile/Junichiro_Yoshimoto/publication/38023985_Derivatives_of_logarithmic_stationary_distributions_for_policy_gradient_reinforcement_learning/links/0046353ad772413072000000.pdf","raw_type":"text"},{"id":"pmh:oai:edoc.mpg.de:548419","is_oa":false,"landing_page_url":"http://edoc.mpg.de/548419","pdf_url":null,"source":{"id":"https://openalex.org/S4406922265","display_name":"Max Planck Institute for Plasma Physics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Neural Computation, v.22, 342-376 (2010)","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:pure.mpg.de:item_1788909","is_oa":true,"landing_page_url":"https://hdl.handle.net/11858/00-001M-0000-0013-C12A-1","pdf_url":"http://hdl.handle.net/11858/00-001M-0000-0013-C12A-1","source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Neural computation","raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1967459934.pdf","grobid_xml":"https://content.openalex.org/works/W1967459934.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W203117010","https://openalex.org/W562332459","https://openalex.org/W1176136657","https://openalex.org/W1515851193","https://openalex.org/W1521427077","https://openalex.org/W1541084404","https://openalex.org/W1553320709","https://openalex.org/W1570704007","https://openalex.org/W1576452626","https://openalex.org/W1606011487","https://openalex.org/W1607198972","https://openalex.org/W1814308503","https://openalex.org/W2009303086","https://openalex.org/W2046765929","https://openalex.org/W2072931156","https://openalex.org/W2073583350","https://openalex.org/W2080759927","https://openalex.org/W2093638026","https://openalex.org/W2098432798","https://openalex.org/W2099196192","https://openalex.org/W2100677568","https://openalex.org/W2103198983","https://openalex.org/W2107726111","https://openalex.org/W2108682071","https://openalex.org/W2113501460","https://openalex.org/W2114537044","https://openalex.org/W2119717200","https://openalex.org/W2127107099","https://openalex.org/W2132351269","https://openalex.org/W2137267792","https://openalex.org/W2144446635","https://openalex.org/W2149418961","https://openalex.org/W2155027007","https://openalex.org/W2158191646","https://openalex.org/W2162401674","https://openalex.org/W2164056559","https://openalex.org/W2165418472","https://openalex.org/W2172968643","https://openalex.org/W2173945562","https://openalex.org/W2285432433","https://openalex.org/W2492794003","https://openalex.org/W2610686804","https://openalex.org/W2911283634","https://openalex.org/W2914656440","https://openalex.org/W2990138404","https://openalex.org/W3041202696","https://openalex.org/W3103182070","https://openalex.org/W3140968660","https://openalex.org/W4205326910","https://openalex.org/W4212774754","https://openalex.org/W4242606736","https://openalex.org/W4246808543","https://openalex.org/W4285719527","https://openalex.org/W4301630257"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Most":[0],"conventional":[1],"policy":[2,27,51,131],"gradient":[3,22,132,146],"reinforcement":[4],"learning":[5,127],"(PGRL)":[6],"algorithms":[7,77,172],"neglect":[8],"(or":[9],"do":[10,78],"not":[11,79],"explicitly":[12],"make":[13],"use":[14],"of)":[15],"a":[16,95,107,124],"term":[17,30],"in":[18,49,141],"the":[19,26,32,35,42,50,54,65,70,99,111,114,143,161,167,170,183],"average":[20,144],"reward":[21,145],"with":[23,135],"respect":[24],"to":[25,41,47,74,82,159],"parameter.":[28,52],"That":[29],"involves":[31],"derivative":[33,104,112],"of":[34,44,110,113,169,185],"stationary":[36,101,115],"state":[37,102,116],"distribution":[38,46,103,117],"that":[39,179],"corresponds":[40],"sensitivity":[43],"its":[45],"changes":[48],"Although":[53],"bias":[55],"introduced":[56],"by":[57,63,150],"this":[58,91],"omission":[59],"can":[60,147,181],"be":[61,83,148],"reduced":[62],"setting":[64,151],"forgetting":[66],"rate":[67],"gamma":[68,81,87,152],"for":[69,97],"value":[71,162],"functions":[72],"close":[73],"1,":[75],"these":[76,180],"permit":[80],"set":[84],"exactly":[85],"at":[86],"=":[88,153],"1.":[89],"In":[90],"article,":[92],"we":[93],"propose":[94],"method":[96],"estimating":[98],"log":[100],"(LSD)":[105],"as":[106],"useful":[108],"form":[109],"through":[118],"backward":[119],"Markov":[120],"chain":[121],"formulation":[122],"and":[123,177],"temporal":[125],"difference":[126],"framework.":[128],"A":[129],"new":[130],"(PG)":[133],"framework":[134],"an":[136],"LSD":[137],"is":[138],"also":[139,165],"proposed,":[140],"which":[142],"estimated":[149],"0,":[154],"so":[155],"it":[156],"becomes":[157],"unnecessary":[158],"learn":[160],"functions.":[163],"We":[164],"test":[166],"performance":[168],"proposed":[171],"using":[173],"simple":[174],"benchmark":[175],"tasks":[176],"show":[178],"improve":[182],"performances":[184],"existing":[186],"PG":[187],"methods.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
