{"id":"https://openalex.org/W1980972971","doi":"https://doi.org/10.1109/adprl.2011.5967353","title":"Approximate reinforcement learning: An overview","display_name":"Approximate reinforcement learning: An overview","publication_year":2011,"publication_date":"2011-04-01","ids":{"openalex":"https://openalex.org/W1980972971","doi":"https://doi.org/10.1109/adprl.2011.5967353","mag":"1980972971"},"language":"en","primary_location":{"id":"doi:10.1109/adprl.2011.5967353","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2011.5967353","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://orbi.uliege.be/bitstream/2268/88933/1/adprl11_Busoniu_et_al.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058935509","display_name":"Lucian Bu\u015foniu","orcid":"https://orcid.org/0000-0001-8017-1296"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Lucian Busoniu","raw_affiliation_strings":["Delft Center of Systems & Control, Delft University of Technnology, Netherlands","Delft Center for Systems & Control, Delft Univ. of Technology, Netherlands"],"affiliations":[{"raw_affiliation_string":"Delft Center of Systems & Control, Delft University of Technnology, Netherlands","institution_ids":["https://openalex.org/I98358874"]},{"raw_affiliation_string":"Delft Center for Systems & Control, Delft Univ. of Technology, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077011518","display_name":"Damien Ernst","orcid":"https://orcid.org/0000-0002-3035-8260"},"institutions":[{"id":"https://openalex.org/I157674565","display_name":"University of Li\u00e8ge","ror":"https://ror.org/00afp2z80","country_code":"BE","type":"education","lineage":["https://openalex.org/I157674565"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Damien Ernst","raw_affiliation_strings":["FRS-FNRS, Systems and Modeling Unit, University of Li\u00e8ge, Belgium","Research Associate of the FRS-FNRS; Systems and Modeling Unit, University of Li\u00e8ge, Belgium"],"affiliations":[{"raw_affiliation_string":"FRS-FNRS, Systems and Modeling Unit, University of Li\u00e8ge, Belgium","institution_ids":["https://openalex.org/I157674565"]},{"raw_affiliation_string":"Research Associate of the FRS-FNRS; Systems and Modeling Unit, University of Li\u00e8ge, Belgium","institution_ids":["https://openalex.org/I157674565"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026547410","display_name":"Bart De Schutter","orcid":"https://orcid.org/0000-0001-9867-6196"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Bart De Schutter","raw_affiliation_strings":["Delft Center of Systems & Control, Delft University of Technnology, Netherlands","Delft Center for Systems & Control, Delft Univ. of Technology, Netherlands"],"affiliations":[{"raw_affiliation_string":"Delft Center of Systems & Control, Delft University of Technnology, Netherlands","institution_ids":["https://openalex.org/I98358874"]},{"raw_affiliation_string":"Delft Center for Systems & Control, Delft Univ. of Technology, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084264842","display_name":"Robert Babu\u0161ka","orcid":"https://orcid.org/0000-0001-9578-8598"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Robert Babuska","raw_affiliation_strings":["Delft Center of Systems & Control, Delft University of Technnology, Netherlands","Delft Center for Systems & Control, Delft Univ. of Technology, Netherlands"],"affiliations":[{"raw_affiliation_string":"Delft Center of Systems & Control, Delft University of Technnology, Netherlands","institution_ids":["https://openalex.org/I98358874"]},{"raw_affiliation_string":"Delft Center for Systems & Control, Delft Univ. of Technology, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058935509"],"corresponding_institution_ids":["https://openalex.org/I98358874"],"apc_list":null,"apc_paid":null,"fwci":3.55,"has_fulltext":true,"cited_by_count":65,"citation_normalized_percentile":{"value":0.92780723,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8936630487442017},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7234156131744385},{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.6746360063552856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.644935131072998},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5572413802146912},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5105768442153931},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4735640287399292},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.35095342993736267},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1489664614200592}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8936630487442017},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7234156131744385},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.6746360063552856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.644935131072998},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5572413802146912},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5105768442153931},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4735640287399292},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.35095342993736267},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1489664614200592},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/adprl.2011.5967353","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2011.5967353","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},{"id":"pmh:oai:orbi.ulg.ac.be:2268/88933","is_oa":true,"landing_page_url":"https://orbi.uliege.be/handle/2268/88933","pdf_url":"https://orbi.uliege.be/bitstream/2268/88933/1/adprl11_Busoniu_et_al.pdf","source":{"id":"https://openalex.org/S4306400651","display_name":"Open Repository and Bibliography (University of Li\u00e8ge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I157674565","host_organization_name":"University of Li\u00e8ge","host_organization_lineage":["https://openalex.org/I157674565"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 2011 IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL-11) (2011-04); Proceedings of the 2011 IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL-11), Paris, France [FR], April 11-15, 2011","raw_type":"peer reviewed"}],"best_oa_location":{"id":"pmh:oai:orbi.ulg.ac.be:2268/88933","is_oa":true,"landing_page_url":"https://orbi.uliege.be/handle/2268/88933","pdf_url":"https://orbi.uliege.be/bitstream/2268/88933/1/adprl11_Busoniu_et_al.pdf","source":{"id":"https://openalex.org/S4306400651","display_name":"Open Repository and Bibliography (University of Li\u00e8ge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I157674565","host_organization_name":"University of Li\u00e8ge","host_organization_lineage":["https://openalex.org/I157674565"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 2011 IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL-11) (2011-04); Proceedings of the 2011 IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL-11), Paris, France [FR], April 11-15, 2011","raw_type":"peer reviewed"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1980972971.pdf","grobid_xml":"https://content.openalex.org/works/W1980972971.grobid-xml"},"referenced_works_count":123,"referenced_works":["https://openalex.org/W13294968","https://openalex.org/W32403112","https://openalex.org/W111165306","https://openalex.org/W166862392","https://openalex.org/W171197886","https://openalex.org/W1491843047","https://openalex.org/W1529558080","https://openalex.org/W1550698229","https://openalex.org/W1553228025","https://openalex.org/W1554366315","https://openalex.org/W1576452626","https://openalex.org/W1576580777","https://openalex.org/W1579026795","https://openalex.org/W1583155004","https://openalex.org/W1601081659","https://openalex.org/W1626155273","https://openalex.org/W1854776945","https://openalex.org/W1998172110","https://openalex.org/W2009303086","https://openalex.org/W2011233848","https://openalex.org/W2027968610","https://openalex.org/W2046513829","https://openalex.org/W2072931156","https://openalex.org/W2073384958","https://openalex.org/W2075268401","https://openalex.org/W2084202551","https://openalex.org/W2091565802","https://openalex.org/W2094387729","https://openalex.org/W2097451572","https://openalex.org/W2098432798","https://openalex.org/W2100857832","https://openalex.org/W2101539915","https://openalex.org/W2103581319","https://openalex.org/W2104335146","https://openalex.org/W2104753538","https://openalex.org/W2107726111","https://openalex.org/W2107977784","https://openalex.org/W2108051346","https://openalex.org/W2108596215","https://openalex.org/W2112264645","https://openalex.org/W2116339921","https://openalex.org/W2116532829","https://openalex.org/W2117355432","https://openalex.org/W2120346334","https://openalex.org/W2121863487","https://openalex.org/W2123979492","https://openalex.org/W2124144580","https://openalex.org/W2124175081","https://openalex.org/W2124477018","https://openalex.org/W2125510930","https://openalex.org/W2125612430","https://openalex.org/W2126685977","https://openalex.org/W2126892350","https://openalex.org/W2130801532","https://openalex.org/W2131490088","https://openalex.org/W2134289401","https://openalex.org/W2135721773","https://openalex.org/W2136655982","https://openalex.org/W2141559645","https://openalex.org/W2144357723","https://openalex.org/W2145519197","https://openalex.org/W2150923691","https://openalex.org/W2151416233","https://openalex.org/W2151661095","https://openalex.org/W2154761920","https://openalex.org/W2155027007","https://openalex.org/W2155067780","https://openalex.org/W2156974606","https://openalex.org/W2158738729","https://openalex.org/W2160284799","https://openalex.org/W2161795906","https://openalex.org/W2163302320","https://openalex.org/W2165060096","https://openalex.org/W2165304603","https://openalex.org/W2169209873","https://openalex.org/W2169982856","https://openalex.org/W2172968643","https://openalex.org/W2268509491","https://openalex.org/W2400458653","https://openalex.org/W2415292238","https://openalex.org/W2484957131","https://openalex.org/W2487144912","https://openalex.org/W2586680856","https://openalex.org/W2611866857","https://openalex.org/W2737668828","https://openalex.org/W2764902300","https://openalex.org/W2951143668","https://openalex.org/W2963705262","https://openalex.org/W3027095131","https://openalex.org/W3099235411","https://openalex.org/W3125826253","https://openalex.org/W3139377883","https://openalex.org/W4214717370","https://openalex.org/W4245296547","https://openalex.org/W4285719527","https://openalex.org/W4299401133","https://openalex.org/W6600556750","https://openalex.org/W6606719070","https://openalex.org/W6634483419","https://openalex.org/W6634556520","https://openalex.org/W6675735773","https://openalex.org/W6676024505","https://openalex.org/W6677036816","https://openalex.org/W6677193802","https://openalex.org/W6677549092","https://openalex.org/W6677737365","https://openalex.org/W6678114464","https://openalex.org/W6678573530","https://openalex.org/W6678900246","https://openalex.org/W6678925830","https://openalex.org/W6679257226","https://openalex.org/W6679703492","https://openalex.org/W6680528610","https://openalex.org/W6681206011","https://openalex.org/W6682074005","https://openalex.org/W6682443896","https://openalex.org/W6682499586","https://openalex.org/W6683204974","https://openalex.org/W6683356630","https://openalex.org/W6683841269","https://openalex.org/W6684022048","https://openalex.org/W6684126333","https://openalex.org/W6685331716"],"related_works":["https://openalex.org/W2163296013","https://openalex.org/W165915117","https://openalex.org/W2326995835","https://openalex.org/W2743859443","https://openalex.org/W2059402478","https://openalex.org/W2123347777","https://openalex.org/W4387804363","https://openalex.org/W2019547100","https://openalex.org/W2477150073","https://openalex.org/W2181415614"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"allows":[3],"agents":[4],"to":[5,8,48,124],"learn":[6],"how":[7],"optimally":[9],"interact":[10],"with":[11],"complex":[12],"environments.":[13],"Fueled":[14],"by":[15],"recent":[16],"advances":[17],"in":[18,26],"approximation-based":[19],"algorithms,":[20,105],"RL":[21,42],"has":[22],"obtained":[23],"impressive":[24],"successes":[25],"robotics,":[27],"artificial":[28],"intelligence,":[29],"control,":[30],"operations":[31],"research,":[32],"etc.":[33],"However,":[34],"the":[35,54,115,126],"scarcity":[36],"of":[37,118],"survey":[38],"papers":[39],"about":[40],"approximate":[41,69,84],"makes":[43],"it":[44],"difficult":[45],"for":[46,68],"newcomers":[47],"grasp":[49],"this":[50,63],"intricate":[51],"field.":[52],"With":[53],"present":[55],"overview,":[56],"we":[57],"take":[58],"a":[59],"step":[60],"toward":[61],"alleviating":[62],"situation.":[64],"We":[65,112],"review":[66],"methods":[67],"RL,":[70],"starting":[71],"from":[72],"their":[73],"dynamic":[74],"programming":[75],"roots":[76],"and":[77,89,103,109,120],"organizing":[78],"them":[79],"into":[80,96],"three":[81],"major":[82],"classes:":[83],"value":[85],"iteration,":[86,88],"policy":[87,90,106],"search.":[91],"Each":[92],"class":[93],"is":[94],"subdivided":[95],"representative":[97],"categories,":[98],"highlighting":[99],"among":[100],"others":[101],"offline":[102],"online":[104],"gradient":[107],"methods,":[108,119],"simulation-based":[110],"techniques.":[111],"also":[113],"compare":[114],"different":[116],"categories":[117],"outline":[121],"possible":[122],"ways":[123],"enhance":[125],"reviewed":[127],"algorithms.":[128]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
