{"id":"https://openalex.org/W2103581399","doi":"https://doi.org/10.1093/jigpal/jzp049","title":"Recurrent policy gradients","display_name":"Recurrent policy gradients","publication_year":2009,"publication_date":"2009-09-09","ids":{"openalex":"https://openalex.org/W2103581399","doi":"https://doi.org/10.1093/jigpal/jzp049","mag":"2103581399"},"language":"fr","primary_location":{"id":"doi:10.1093/jigpal/jzp049","is_oa":false,"landing_page_url":"https://doi.org/10.1093/jigpal/jzp049","pdf_url":null,"source":{"id":"https://openalex.org/S2734381524","display_name":"Logic Journal of IGPL","issn_l":"1367-0751","issn":["1367-0751","1368-9894"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Logic Journal of IGPL","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://mediatum.ub.tum.de/node?id=1287506","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053990531","display_name":"Daan Wierstra","orcid":null},"institutions":[{"id":"https://openalex.org/I2614128279","display_name":"Dalle Molle Institute for Artificial Intelligence Research","ror":"https://ror.org/013355g38","country_code":"CH","type":"facility","lineage":["https://openalex.org/I15196421","https://openalex.org/I2614128279","https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"D. Wierstra","raw_affiliation_strings":["IDSIA, Manno\u2013Lugano, Switzerland.E-mail: daan@idsia.ch; alexander@idsia.ch","IDSIA, Manno\u2013Lugano, Switzerland.E-mail: ;"],"affiliations":[{"raw_affiliation_string":"IDSIA, Manno\u2013Lugano, Switzerland.E-mail: daan@idsia.ch; alexander@idsia.ch","institution_ids":["https://openalex.org/I2614128279"]},{"raw_affiliation_string":"IDSIA, Manno\u2013Lugano, Switzerland.E-mail: ;","institution_ids":["https://openalex.org/I2614128279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038027897","display_name":"Alexander F\u00f6rster","orcid":"https://orcid.org/0000-0003-0117-0135"},"institutions":[{"id":"https://openalex.org/I2614128279","display_name":"Dalle Molle Institute for Artificial Intelligence Research","ror":"https://ror.org/013355g38","country_code":"CH","type":"facility","lineage":["https://openalex.org/I15196421","https://openalex.org/I2614128279","https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"A. Forster","raw_affiliation_strings":["IDSIA, Manno\u2013Lugano, Switzerland.E-mail: daan@idsia.ch; alexander@idsia.ch","IDSIA, Manno\u2013Lugano, Switzerland.E-mail: ;"],"affiliations":[{"raw_affiliation_string":"IDSIA, Manno\u2013Lugano, Switzerland.E-mail: daan@idsia.ch; alexander@idsia.ch","institution_ids":["https://openalex.org/I2614128279"]},{"raw_affiliation_string":"IDSIA, Manno\u2013Lugano, Switzerland.E-mail: ;","institution_ids":["https://openalex.org/I2614128279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071367253","display_name":"Jan Peters","orcid":"https://orcid.org/0000-0002-5266-8091"},"institutions":[{"id":"https://openalex.org/I149899117","display_name":"Max Planck Society","ror":"https://ror.org/01hhn8329","country_code":"DE","type":"funder","lineage":["https://openalex.org/I149899117"]},{"id":"https://openalex.org/I4210112925","display_name":"Max Planck Institute for Biological Cybernetics","ror":"https://ror.org/026nmvv73","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210112925"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"J. Peters","raw_affiliation_strings":["Max Planck Institute for Biological Cybernetics, T\u00fcbingen, Germany.E-mail: mail@jan-peters.net","Max Planck Institute for Biological Cybernetics, T\u00fcbingen, Germany.E-mail:"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Biological Cybernetics, T\u00fcbingen, Germany.E-mail: mail@jan-peters.net","institution_ids":["https://openalex.org/I4210112925"]},{"raw_affiliation_string":"Max Planck Institute for Biological Cybernetics, T\u00fcbingen, Germany.E-mail:","institution_ids":["https://openalex.org/I149899117"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071172037","display_name":"J\u00fcrgen Schmidhuber","orcid":null},"institutions":[{"id":"https://openalex.org/I2614128279","display_name":"Dalle Molle Institute for Artificial Intelligence Research","ror":"https://ror.org/013355g38","country_code":"CH","type":"facility","lineage":["https://openalex.org/I15196421","https://openalex.org/I2614128279","https://openalex.org/I57201433"]},{"id":"https://openalex.org/I57201433","display_name":"Universit\u00e0 della Svizzera italiana","ror":"https://ror.org/03c4atk17","country_code":"CH","type":"education","lineage":["https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"J. Schmidhuber","raw_affiliation_strings":["IDSIA, Manno\u2013Lugano, Switzerland; TU M\u00fcnchen Institut f\u00fcr Informatik, Garching bei M\u00fcnchen, Germany; University of Lugano, Faculty of Informatics, Lugano, Switzerland.E-mail: juergen@idsia.ch","IDSIA, Manno\u2013Lugano, Switzerland; TU M\u00fcnchen Institut f\u00fcr Informatik, Garching bei M\u00fcnchen, Germany; University of Lugano, Faculty of Informatics, Lugano, Switzerland.E-mail:"],"affiliations":[{"raw_affiliation_string":"IDSIA, Manno\u2013Lugano, Switzerland; TU M\u00fcnchen Institut f\u00fcr Informatik, Garching bei M\u00fcnchen, Germany; University of Lugano, Faculty of Informatics, Lugano, Switzerland.E-mail: juergen@idsia.ch","institution_ids":["https://openalex.org/I2614128279"]},{"raw_affiliation_string":"IDSIA, Manno\u2013Lugano, Switzerland; TU M\u00fcnchen Institut f\u00fcr Informatik, Garching bei M\u00fcnchen, Germany; University of Lugano, Faculty of Informatics, Lugano, Switzerland.E-mail:","institution_ids":["https://openalex.org/I57201433"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5053990531"],"corresponding_institution_ids":["https://openalex.org/I2614128279"],"apc_list":{"value":4151,"currency":"USD","value_usd":4151},"apc_paid":null,"fwci":3.613,"has_fulltext":false,"cited_by_count":104,"citation_normalized_percentile":{"value":0.93174038,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"18","issue":"5","first_page":"620","last_page":"634"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.84821617603302},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8441139459609985},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.7768272161483765},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7533798217773438},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6206355094909668},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.594488263130188},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5645439624786377},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.563075065612793},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5470094680786133},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4724704921245575},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.41832417249679565},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4145486354827881},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4022407829761505},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3305119276046753},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0859849750995636}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84821617603302},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8441139459609985},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.7768272161483765},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7533798217773438},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6206355094909668},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.594488263130188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5645439624786377},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.563075065612793},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5470094680786133},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4724704921245575},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.41832417249679565},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4145486354827881},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4022407829761505},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3305119276046753},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0859849750995636},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":9,"locations":[{"id":"doi:10.1093/jigpal/jzp049","is_oa":false,"landing_page_url":"https://doi.org/10.1093/jigpal/jzp049","pdf_url":null,"source":{"id":"https://openalex.org/S2734381524","display_name":"Logic Journal of IGPL","issn_l":"1367-0751","issn":["1367-0751","1368-9894"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Logic Journal of IGPL","raw_type":"journal-article"},{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:55397","is_oa":false,"landing_page_url":"http://tubiblio.ulb.tu-darmstadt.de/view/person/Wierstra=3AD=2E=3A=3A.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"NonPeerReviewed"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.148.7345","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.148.7345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.idsia.ch/~daan/papers/jof.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.165.8275","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.8275","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.kyb.tuebingen.mpg.de/publications/attachments/jzp049v1_5879%5B0%5D.pdf","raw_type":"text"},{"id":"pmh:oai:doc.rero.ch:293283","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400148","display_name":"reroDoc Digital Library","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:edoc.mpg.de:461746","is_oa":false,"landing_page_url":"http://edoc.mpg.de/461746","pdf_url":null,"source":{"id":"https://openalex.org/S4406922265","display_name":"Max Planck Institute for Plasma Physics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Logic Journal of the IGPL, v.Epub ahead, 1-15 (2009)","raw_type":"Article"},{"id":"pmh:oai:edoc.mpg.de:548469","is_oa":false,"landing_page_url":"http://edoc.mpg.de/548469","pdf_url":null,"source":{"id":"https://openalex.org/S4406922265","display_name":"Max Planck Institute for Plasma Physics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Logic Journal of the IGPL, v.18, 620-634 (2010)","raw_type":"Article"},{"id":"pmh:oai:mediatum.ub.tum.de:node/1287506","is_oa":true,"landing_page_url":"http://mediatum.ub.tum.de/node?id=1287506","pdf_url":null,"source":{"id":"https://openalex.org/S4377196330","display_name":"mediaTUM  (Technical University of Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:oai:pure.mpg.de:item_1788480","is_oa":false,"landing_page_url":"http://hdl.handle.net/11858/00-001M-0000-0013-BDD0-6","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Logic Journal of the IGPL","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:mediatum.ub.tum.de:node/1287506","is_oa":true,"landing_page_url":"http://mediatum.ub.tum.de/node?id=1287506","pdf_url":null,"source":{"id":"https://openalex.org/S4377196330","display_name":"mediaTUM  (Technical University of Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[{"score":0.7300000190734863,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W185726777","https://openalex.org/W577502697","https://openalex.org/W1176136657","https://openalex.org/W1539216098","https://openalex.org/W1541084404","https://openalex.org/W1640774615","https://openalex.org/W1814308503","https://openalex.org/W1899504021","https://openalex.org/W1994923984","https://openalex.org/W2016589492","https://openalex.org/W2019210935","https://openalex.org/W2064675550","https://openalex.org/W2080759927","https://openalex.org/W2094149843","https://openalex.org/W2096533821","https://openalex.org/W2107878631","https://openalex.org/W2114537044","https://openalex.org/W2116850952","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2125612430","https://openalex.org/W2127107099","https://openalex.org/W2136602922","https://openalex.org/W2139053308","https://openalex.org/W2150355110","https://openalex.org/W2155027007","https://openalex.org/W2160143733","https://openalex.org/W2169015875","https://openalex.org/W2172968643","https://openalex.org/W2914656440","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2895097035","https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W4366280654","https://openalex.org/W3160167280","https://openalex.org/W3020853991","https://openalex.org/W4362706668","https://openalex.org/W4231621013","https://openalex.org/W3171021120","https://openalex.org/W3008318776"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,59,103],"for":[2,55,81,129],"partially":[3],"observable":[4],"Markov":[5],"decision":[6],"problems":[7,114],"(POMDPs)":[8],"is":[9,106],"a":[10,43,52,90,100,126,130,142],"challenge":[11],"as":[12,37],"it":[13],"requires":[14],"policies":[15,112],"with":[16,57],"an":[17],"internal":[18],"state.":[19],"Traditional":[20],"approaches":[21],"suffer":[22],"significantly":[23],"from":[24],"this":[25,86],"shortcoming":[26],"and":[27,42,63],"usually":[28],"make":[29],"strong":[30],"assumptions":[31],"on":[32,113,156],"the":[33,94],"problem":[34],"domain":[35],"such":[36],"perfect":[38],"system":[39],"models,":[40],"state-estimators":[41],"Markovian":[44],"hidden":[45,61],"system.":[46],"Recurrent":[47,95],"neural":[48,132],"networks":[49],"(RNNs)":[50],"offer":[51],"natural":[53],"framework":[54],"dealing":[56],"policy":[58,82,91,127],"using":[60,75,165],"state":[62],"require":[64,116],"only":[65],"few":[66],"limiting":[67],"assumptions.":[68],"As":[69],"they":[70,78],"can":[71],"be":[72],"trained":[73],"well":[74],"gradient":[76,83,92,128],"descent,":[77],"are":[79,149],"suited":[80],"approaches.":[84],"In":[85],"paper,":[87],"we":[88,148,162],"present":[89],"method,":[93],"Policy":[96],"Gradient":[97],"which":[98,115],"constitutes":[99],"model-free":[101],"reinforcement":[102],"method.":[104],"It":[105],"aimed":[107],"at":[108],"training":[109],"limited-memory":[110],"stochastic":[111,182],"long-term":[117],"memories":[118],"of":[119],"past":[120],"observations.":[121],"The":[122],"approach":[123,176],"involves":[124],"approximating":[125],"recurrent":[131],"network":[133],"by":[134],"backpropagating":[135],"return-weighted":[136],"characteristic":[137],"eligibilities":[138],"through":[139],"time.":[140],"Using":[141],"\u2018\u2018Long":[143],"Short-Term":[144],"Memory\u2019\u2019":[145],"RNN":[146],"architecture,":[147],"able":[150],"to":[151,177],"outperform":[152],"previous":[153],"RL":[154],"methods":[155],"three":[157],"important":[158],"benchmark":[159],"tasks.":[160],"Furthermore,":[161],"show":[163],"that":[164],"history-dependent":[166],"baselines":[167],"helps":[168],"reducing":[169],"estimation":[170],"variance":[171],"significantly,":[172],"thus":[173],"enabling":[174],"our":[175],"tackle":[178],"more":[179],"challenging,":[180],"highly":[181],"environments.":[183]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":12},{"year":2017,"cited_by_count":14},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
