{"id":"https://openalex.org/W3100789280","doi":"https://doi.org/10.1109/msp.2017.2743240","title":"Deep Reinforcement Learning: A Brief Survey","display_name":"Deep Reinforcement Learning: A Brief Survey","publication_year":2017,"publication_date":"2017-11-01","ids":{"openalex":"https://openalex.org/W3100789280","doi":"https://doi.org/10.1109/msp.2017.2743240","mag":"3100789280"},"language":"en","primary_location":{"id":"doi:10.1109/msp.2017.2743240","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msp.2017.2743240","pdf_url":null,"source":{"id":"https://openalex.org/S120977877","display_name":"IEEE Signal Processing Magazine","issn_l":"1053-5888","issn":["1053-5888","1558-0792"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065210301","display_name":"Kai Arulkumaran","orcid":"https://orcid.org/0000-0003-0459-892X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kai Arulkumaran","raw_affiliation_strings":["Department of Bioengineering"],"affiliations":[{"raw_affiliation_string":"Department of Bioengineering","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001763022","display_name":"Marc Peter Deisenroth","orcid":"https://orcid.org/0000-0003-1503-680X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marc Peter Deisenroth","raw_affiliation_strings":["Department of Computing, PROWLER.io"],"affiliations":[{"raw_affiliation_string":"Department of Computing, PROWLER.io","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066559387","display_name":"Miles Brundage","orcid":null},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Miles Brundage","raw_affiliation_strings":["Human and Social Dimensions of Science and Technology Department, Arizona State University","University of Oxford, Future of Humanity Institute"],"affiliations":[{"raw_affiliation_string":"Human and Social Dimensions of Science and Technology Department, Arizona State University","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"University of Oxford, Future of Humanity Institute","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027222571","display_name":"Anil A. Bharath","orcid":"https://orcid.org/0000-0001-8808-2714"},"institutions":[{"id":"https://openalex.org/I4210109553","display_name":"Institution of Engineering and Technology","ror":"https://ror.org/01xqg1464","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210109553"]},{"id":"https://openalex.org/I4210162661","display_name":"Cortexica (United Kingdom)","ror":"https://ror.org/03xdnx124","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210162661"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anil Anthony Bharath","raw_affiliation_strings":["Cortexica Vision Systems","Institution of Engineering and Technology"],"affiliations":[{"raw_affiliation_string":"Cortexica Vision Systems","institution_ids":["https://openalex.org/I4210162661"]},{"raw_affiliation_string":"Institution of Engineering and Technology","institution_ids":["https://openalex.org/I4210109553"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5065210301"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":140.5237,"has_fulltext":false,"cited_by_count":4197,"citation_normalized_percentile":{"value":0.99972014,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"34","issue":"6","first_page":"26","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9342820048332214},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7229865789413452},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.717069149017334},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6893104910850525},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5011005401611328},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4366471469402313},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.42770177125930786},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.41639405488967896},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39761263132095337},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.35564112663269043},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0647149384021759}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9342820048332214},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7229865789413452},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.717069149017334},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6893104910850525},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5011005401611328},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4366471469402313},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.42770177125930786},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.41639405488967896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39761263132095337},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.35564112663269043},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0647149384021759},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/msp.2017.2743240","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msp.2017.2743240","pdf_url":null,"source":{"id":"https://openalex.org/S120977877","display_name":"IEEE Signal Processing Magazine","issn_l":"1053-5888","issn":["1053-5888","1558-0792"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Magazine","raw_type":"journal-article"},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10083557","is_oa":false,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10083557/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"   IEEE Signal Processing Magazine , 34  (6)   pp. 26-38.   (2017)      ","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","score":0.4300000071525574,"display_name":"Partnerships for the goals"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":252,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W166862392","https://openalex.org/W172298727","https://openalex.org/W567721252","https://openalex.org/W779494576","https://openalex.org/W834081922","https://openalex.org/W1191599655","https://openalex.org/W1480527676","https://openalex.org/W1490862430","https://openalex.org/W1498436455","https://openalex.org/W1499669280","https://openalex.org/W1514535095","https://openalex.org/W1564755532","https://openalex.org/W1573503290","https://openalex.org/W1600437712","https://openalex.org/W1646152356","https://openalex.org/W1658008008","https://openalex.org/W1681299129","https://openalex.org/W1771410628","https://openalex.org/W1821462560","https://openalex.org/W1870822514","https://openalex.org/W1909320841","https://openalex.org/W1929981607","https://openalex.org/W1931877416","https://openalex.org/W1949804828","https://openalex.org/W1957496711","https://openalex.org/W1959608418","https://openalex.org/W1968962398","https://openalex.org/W1987150989","https://openalex.org/W2002196558","https://openalex.org/W2009303086","https://openalex.org/W2009551863","https://openalex.org/W2012587148","https://openalex.org/W2026659355","https://openalex.org/W2027968610","https://openalex.org/W2038794597","https://openalex.org/W2046765929","https://openalex.org/W2056354534","https://openalex.org/W2056653303","https://openalex.org/W2061562262","https://openalex.org/W2091565802","https://openalex.org/W2099471712","https://openalex.org/W2099618002","https://openalex.org/W2103581399","https://openalex.org/W2104733512","https://openalex.org/W2109910161","https://openalex.org/W2115211925","https://openalex.org/W2116001481","https://openalex.org/W2118688707","https://openalex.org/W2119717200","https://openalex.org/W2121092017","https://openalex.org/W2121103318","https://openalex.org/W2121863487","https://openalex.org/W2129670787","https://openalex.org/W2130801532","https://openalex.org/W2131600418","https://openalex.org/W2137376508","https://openalex.org/W2138243089","https://openalex.org/W2139053308","https://openalex.org/W2139418546","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2155968351","https://openalex.org/W2156737235","https://openalex.org/W2158782408","https://openalex.org/W2163922914","https://openalex.org/W2165150801","https://openalex.org/W2167224731","https://openalex.org/W2168231600","https://openalex.org/W2168359464","https://openalex.org/W2171278097","https://openalex.org/W2173564293","https://openalex.org/W2174364281","https://openalex.org/W2176263492","https://openalex.org/W2192203593","https://openalex.org/W2195446438","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2280163991","https://openalex.org/W2290354866","https://openalex.org/W2291973609","https://openalex.org/W2342662072","https://openalex.org/W2344556769","https://openalex.org/W2395575420","https://openalex.org/W2417089653","https://openalex.org/W2417786368","https://openalex.org/W2442341664","https://openalex.org/W2480004914","https://openalex.org/W2510924756","https://openalex.org/W2518713116","https://openalex.org/W2521274174","https://openalex.org/W2530944449","https://openalex.org/W2547416798","https://openalex.org/W2551887912","https://openalex.org/W2553303224","https://openalex.org/W2553347458","https://openalex.org/W2554984891","https://openalex.org/W2560678327","https://openalex.org/W2567015638","https://openalex.org/W2578206533","https://openalex.org/W2580175322","https://openalex.org/W2591957724","https://openalex.org/W2593044849","https://openalex.org/W2594794854","https://openalex.org/W2594829461","https://openalex.org/W2596367596","https://openalex.org/W2604283518","https://openalex.org/W2606047872","https://openalex.org/W2607198029","https://openalex.org/W2609650878","https://openalex.org/W2612610049","https://openalex.org/W2615790994","https://openalex.org/W2620671107","https://openalex.org/W2624780181","https://openalex.org/W2726187156","https://openalex.org/W2735995851","https://openalex.org/W2736601468","https://openalex.org/W2738675347","https://openalex.org/W2746553466","https://openalex.org/W2749807327","https://openalex.org/W2751973545","https://openalex.org/W2756196406","https://openalex.org/W2787259794","https://openalex.org/W2911296969","https://openalex.org/W2919115771","https://openalex.org/W2949267040","https://openalex.org/W2949475445","https://openalex.org/W2950471160","https://openalex.org/W2950560044","https://openalex.org/W2950872548","https://openalex.org/W2951507724","https://openalex.org/W2951527505","https://openalex.org/W2951799221","https://openalex.org/W2952509347","https://openalex.org/W2952629144","https://openalex.org/W2962730405","https://openalex.org/W2962736495","https://openalex.org/W2962764591","https://openalex.org/W2962841471","https://openalex.org/W2962872206","https://openalex.org/W2962887844","https://openalex.org/W2962897886","https://openalex.org/W2962938178","https://openalex.org/W2962986780","https://openalex.org/W2963000099","https://openalex.org/W2963038698","https://openalex.org/W2963095800","https://openalex.org/W2963170229","https://openalex.org/W2963184621","https://openalex.org/W2963262099","https://openalex.org/W2963276097","https://openalex.org/W2963277051","https://openalex.org/W2963293533","https://openalex.org/W2963305465","https://openalex.org/W2963430173","https://openalex.org/W2963477884","https://openalex.org/W2963523627","https://openalex.org/W2963634205","https://openalex.org/W2963639957","https://openalex.org/W2963674921","https://openalex.org/W2963830168","https://openalex.org/W2963864421","https://openalex.org/W2963871073","https://openalex.org/W2963938771","https://openalex.org/W2963948945","https://openalex.org/W2964006217","https://openalex.org/W2964043796","https://openalex.org/W2964118262","https://openalex.org/W2964161785","https://openalex.org/W2964164283","https://openalex.org/W2964227312","https://openalex.org/W2964250653","https://openalex.org/W2964338167","https://openalex.org/W2964352247","https://openalex.org/W3021208093","https://openalex.org/W3097999317","https://openalex.org/W3103559770","https://openalex.org/W3103780890","https://openalex.org/W3139377883","https://openalex.org/W4205513846","https://openalex.org/W4214717370","https://openalex.org/W4241521318","https://openalex.org/W4254755460","https://openalex.org/W4293396018","https://openalex.org/W4293862243","https://openalex.org/W4297732320","https://openalex.org/W4297795161","https://openalex.org/W4297804343","https://openalex.org/W4298174377","https://openalex.org/W4298876402","https://openalex.org/W4300971732","https://openalex.org/W4302570325","https://openalex.org/W4319988532","https://openalex.org/W4320013936","https://openalex.org/W4362203700","https://openalex.org/W4391602018","https://openalex.org/W4394652010","https://openalex.org/W4394666657","https://openalex.org/W4394672593","https://openalex.org/W6607097208","https://openalex.org/W6616173779","https://openalex.org/W6622487243","https://openalex.org/W6623316541","https://openalex.org/W6627932998","https://openalex.org/W6629277349","https://openalex.org/W6630875275","https://openalex.org/W6636881020","https://openalex.org/W6638018090","https://openalex.org/W6639732818","https://openalex.org/W6639949747","https://openalex.org/W6640443443","https://openalex.org/W6640963894","https://openalex.org/W6675999342","https://openalex.org/W6677477928","https://openalex.org/W6677939520","https://openalex.org/W6679257226","https://openalex.org/W6682849425","https://openalex.org/W6684205842","https://openalex.org/W6684338915","https://openalex.org/W6684921986","https://openalex.org/W6685200760","https://openalex.org/W6685444567","https://openalex.org/W6685757253","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6695011786","https://openalex.org/W6696324988","https://openalex.org/W6696783566","https://openalex.org/W6701573534","https://openalex.org/W6703271639","https://openalex.org/W6704647235","https://openalex.org/W6704665273","https://openalex.org/W6712181171","https://openalex.org/W6713411898","https://openalex.org/W6716474083","https://openalex.org/W6716653466","https://openalex.org/W6717230150","https://openalex.org/W6718092244","https://openalex.org/W6726754200","https://openalex.org/W6726913060","https://openalex.org/W6727349600","https://openalex.org/W6729556111","https://openalex.org/W6730111887","https://openalex.org/W6730127522","https://openalex.org/W6731334075","https://openalex.org/W6734215269","https://openalex.org/W6736803224","https://openalex.org/W6739416393","https://openalex.org/W6765378368","https://openalex.org/W6766234440","https://openalex.org/W6792155000"],"related_works":["https://openalex.org/W2116677773","https://openalex.org/W2155261584","https://openalex.org/W2584231425","https://openalex.org/W2150611273","https://openalex.org/W4306904969","https://openalex.org/W4207086172","https://openalex.org/W2042919702","https://openalex.org/W4225981436","https://openalex.org/W2156185805","https://openalex.org/W2770353918"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,35],"learning":[2,32,36,48],"(DRL)":[3],"is":[4,33],"poised":[5],"to":[6,38,40,49,61,68,87,95],"revolutionize":[7],"the":[8,27,76,88,96,114,132,155],"field":[9,90],"of":[10,26,91,99,135,152],"artificial":[11],"intelligence":[12],"(AI)":[13],"and":[14,101,123],"represents":[15],"a":[16,23],"step":[17],"toward":[18],"building":[19],"autonomous":[20],"systems":[21],"with":[22,84],"higher-level":[24],"understanding":[25,142],"visual":[28,141],"world.":[29,78],"Currently,":[30],"deep":[31,111,115,136],"enabling":[34],"(RL)":[37],"scale":[39],"problems":[41],"that":[42],"were":[43],"previously":[44],"intractable,":[45],"such":[46],"as":[47],"play":[50],"video":[51],"games":[52],"directly":[53,71],"from":[54,72],"pixels.":[55],"DRL":[56],"algorithms":[57,109],"are":[58],"also":[59],"applied":[60],"robotics,":[62],"allowing":[63],"control":[64],"policies":[65],"for":[66],"robots":[67],"be":[69],"learned":[70],"camera":[73],"inputs":[74],"in":[75,110],"real":[77],"In":[79,128],"this":[80],"survey,":[81],"we":[82,130,147],"begin":[83],"an":[85],"introduction":[86],"general":[89],"RL,":[92,112],"then":[93],"progress":[94],"main":[97],"streams":[98],"value-based":[100],"policy-based":[102],"methods.":[103],"Our":[104],"survey":[105],"will":[106],"cover":[107],"central":[108],"including":[113],"Q-network":[116],"(DQN),":[117],"trust":[118],"region":[119],"policy":[120],"optimization":[121],"(TRPO),":[122],"asynchronous":[124],"advantage":[125],"actor":[126],"critic.":[127],"parallel,":[129],"highlight":[131],"unique":[133],"advantages":[134],"neural":[137],"networks,":[138],"focusing":[139],"on":[140],"via":[143],"RL.":[144],"To":[145],"conclude,":[146],"describe":[148],"several":[149],"current":[150],"areas":[151],"research":[153],"within":[154],"field.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":150},{"year":2025,"cited_by_count":817},{"year":2024,"cited_by_count":770},{"year":2023,"cited_by_count":733},{"year":2022,"cited_by_count":559},{"year":2021,"cited_by_count":491},{"year":2020,"cited_by_count":361},{"year":2019,"cited_by_count":227},{"year":2018,"cited_by_count":87},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2020-11-23T00:00:00"}
