{"id":"https://openalex.org/W4301715973","doi":"https://doi.org/10.1007/978-1-4899-7687-1_646","title":"Policy Gradient Methods","display_name":"Policy Gradient Methods","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W4301715973","doi":"https://doi.org/10.1007/978-1-4899-7687-1_646"},"language":"en","primary_location":{"id":"doi:10.1007/978-1-4899-7687-1_646","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-1-4899-7687-1_646","pdf_url":null,"source":{"id":"https://openalex.org/S4306509369","display_name":"Encyclopedia of Machine Learning and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Encyclopedia of Machine Learning and Data Mining","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071367253","display_name":"Jan Peters","orcid":"https://orcid.org/0000-0002-5266-8091"},"institutions":[{"id":"https://openalex.org/I4210112925","display_name":"Max Planck Institute for Biological Cybernetics","ror":"https://ror.org/026nmvv73","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210112925"]},{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]},{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jan Peters","raw_affiliation_strings":["Department of Empirical Inference, Max-Planck Institute for Intelligent Systems, Spemannstr. 38, 72076, T\u00fcbingen, Germany","Intelligent Autonomous Systems, Computer Science Department, Technische Universit\u00e4t Darmstadt, Hochschulstr. 10, 64293, Darmstadt, Hessen, Germany","Max Planck Institute for Biological Cybernetics, T\u00fcbingen, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Empirical Inference, Max-Planck Institute for Intelligent Systems, Spemannstr. 38, 72076, T\u00fcbingen, Germany","institution_ids":["https://openalex.org/I4210135521"]},{"raw_affiliation_string":"Intelligent Autonomous Systems, Computer Science Department, Technische Universit\u00e4t Darmstadt, Hochschulstr. 10, 64293, Darmstadt, Hessen, Germany","institution_ids":["https://openalex.org/I31512782"]},{"raw_affiliation_string":"Max Planck Institute for Biological Cybernetics, T\u00fcbingen, Germany","institution_ids":["https://openalex.org/I4210112925"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112411386","display_name":"J. Andrew Bagnell","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Andrew Bagnell","raw_affiliation_strings":["Carnegie Mellon University, 5000 Forbes Avenue, 15213, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, 5000 Forbes Avenue, 15213, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5071367253"],"corresponding_institution_ids":["https://openalex.org/I31512782","https://openalex.org/I4210112925","https://openalex.org/I4210135521"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.69591329,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"982","last_page":"985"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.788550853729248},{"id":"https://openalex.org/keywords/gradient-method","display_name":"Gradient method","score":0.563093364238739},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5557658076286316},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5192027688026428},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5139378905296326},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5123142004013062},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.48771223425865173},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4493289291858673},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4188348054885864},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3515814542770386},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2812654376029968},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2606077194213867},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.15063229203224182},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.114034503698349}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.788550853729248},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.563093364238739},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5557658076286316},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5192027688026428},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5139378905296326},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5123142004013062},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.48771223425865173},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4493289291858673},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4188348054885864},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3515814542770386},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2812654376029968},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2606077194213867},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.15063229203224182},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.114034503698349},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-1-4899-7687-1_646","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-1-4899-7687-1_646","pdf_url":null,"source":{"id":"https://openalex.org/S4306509369","display_name":"Encyclopedia of Machine Learning and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Encyclopedia of Machine Learning and Data Mining","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Climate action","id":"https://metadata.un.org/sdg/13","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W203276351","https://openalex.org/W2046765929","https://openalex.org/W2125612430","https://openalex.org/W2610184409","https://openalex.org/W4239993559"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2386410636","https://openalex.org/W2025663273","https://openalex.org/W3038962357","https://openalex.org/W3099153698"],"abstract_inverted_index":null,"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
