{"id":"https://openalex.org/W4297964528","doi":"https://doi.org/10.1109/tnnls.2022.3207346","title":"Deep Reinforcement Learning: A Survey","display_name":"Deep Reinforcement Learning: A Survey","publication_year":2022,"publication_date":"2022-09-28","ids":{"openalex":"https://openalex.org/W4297964528","doi":"https://doi.org/10.1109/tnnls.2022.3207346","pmid":"https://pubmed.ncbi.nlm.nih.gov/36170386"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2022.3207346","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3207346","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100407927","display_name":"Xu Wang","orcid":"https://orcid.org/0000-0003-0935-6735"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xu Wang","raw_affiliation_strings":["Xi&#x2019;an Key Laboratory of Big Data and Intelligent Vision, Xidian University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Key Laboratory of Big Data and Intelligent Vision, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100350760","display_name":"Sen Wang","orcid":"https://orcid.org/0000-0003-1537-8834"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sen Wang","raw_affiliation_strings":["Xi&#x2019;an Key Laboratory of Big Data and Intelligent Vision, Xidian University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Key Laboratory of Big Data and Intelligent Vision, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080249769","display_name":"Xingxing Liang","orcid":"https://orcid.org/0000-0002-3594-2167"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingxing Liang","raw_affiliation_strings":["Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020197287","display_name":"Dawei Zhao","orcid":"https://orcid.org/0000-0002-1812-1316"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Zhao","raw_affiliation_strings":["National Innovation Institute of Defense Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Innovation Institute of Defense Technology, Beijing, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101842754","display_name":"Jincai Huang","orcid":"https://orcid.org/0000-0003-4515-1518"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jincai Huang","raw_affiliation_strings":["Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Information Systems Engineering Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053112608","display_name":"Xin Xu","orcid":"https://orcid.org/0000-0003-3238-745X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Xu","raw_affiliation_strings":["College of Intelligence Science, National University of Defense Technology (NUDT), Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science, National University of Defense Technology (NUDT), Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005995567","display_name":"Bin Dai","orcid":"https://orcid.org/0000-0001-9405-2626"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Dai","raw_affiliation_strings":["National Innovation Institute of Defense Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Innovation Institute of Defense Technology, Beijing, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":null,"display_name":"Qiguang Miao","orcid":"https://orcid.org/0000-0001-6766-8310"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiguang Miao","raw_affiliation_strings":["Xi&#x2019;an Key Laboratory of Big Data and Intelligent Vision, Xidian University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Key Laboratory of Big Data and Intelligent Vision, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100407927"],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":86.7547,"has_fulltext":false,"cited_by_count":710,"citation_normalized_percentile":{"value":0.99959411,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"35","issue":"4","first_page":"5064","last_page":"5078"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8291000127792358,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8291000127792358,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14413","display_name":"Advanced Technologies in Various Fields","score":0.766700029373169,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8945599794387817},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6159695982933044},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6054559946060181},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.4380570650100708},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40470433235168457},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.0719035267829895}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8945599794387817},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6159695982933044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6054559946060181},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.4380570650100708},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40470433235168457},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0719035267829895},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2022.3207346","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3207346","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:36170386","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36170386","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7699999809265137}],"awards":[{"id":"https://openalex.org/G2324311213","display_name":null,"funder_award_id":"61772396","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3157959067","display_name":null,"funder_award_id":"2018YFC0807500","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4141414208","display_name":null,"funder_award_id":"2019M663640","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G4895336395","display_name":null,"funder_award_id":"61772392","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5196624177","display_name":null,"funder_award_id":"61825305","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6041224194","display_name":null,"funder_award_id":"2020JQ-330","funder_id":"https://openalex.org/F4320324173","funder_display_name":"Natural Science Foundation of Shaanxi Province"},{"id":"https://openalex.org/G6461786173","display_name":null,"funder_award_id":"2020JM-195","funder_id":"https://openalex.org/F4320324173","funder_display_name":"Natural Science Foundation of Shaanxi Province"},{"id":"https://openalex.org/G8760221190","display_name":null,"funder_award_id":"61902296","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320324173","display_name":"Natural Science Foundation of Shaanxi Province","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":148,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W206679605","https://openalex.org/W1542941925","https://openalex.org/W1552830313","https://openalex.org/W1965555277","https://openalex.org/W1996579288","https://openalex.org/W1999874108","https://openalex.org/W2017957151","https://openalex.org/W2027197837","https://openalex.org/W2040870580","https://openalex.org/W2045031658","https://openalex.org/W2064675550","https://openalex.org/W2067707835","https://openalex.org/W2076337359","https://openalex.org/W2097778153","https://openalex.org/W2103496339","https://openalex.org/W2105482032","https://openalex.org/W2109910161","https://openalex.org/W2112796928","https://openalex.org/W2119717200","https://openalex.org/W2137983211","https://openalex.org/W2145339207","https://openalex.org/W2165698076","https://openalex.org/W2166302491","https://openalex.org/W2168359464","https://openalex.org/W2169498096","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2580909119","https://openalex.org/W2604382266","https://openalex.org/W2617547828","https://openalex.org/W2738318237","https://openalex.org/W2746553466","https://openalex.org/W2761873684","https://openalex.org/W2765302304","https://openalex.org/W2766447205","https://openalex.org/W2787894218","https://openalex.org/W2787933113","https://openalex.org/W2788862220","https://openalex.org/W2808421695","https://openalex.org/W2808492412","https://openalex.org/W2888442053","https://openalex.org/W2897475915","https://openalex.org/W2902572901","https://openalex.org/W2902907165","https://openalex.org/W2911087563","https://openalex.org/W2919115771","https://openalex.org/W2949369413","https://openalex.org/W2950635152","https://openalex.org/W2952194250","https://openalex.org/W2962954724","https://openalex.org/W2963177403","https://openalex.org/W2963428623","https://openalex.org/W2964227312","https://openalex.org/W2982316857","https://openalex.org/W2996896271","https://openalex.org/W3014101816","https://openalex.org/W3041202696","https://openalex.org/W3093426589","https://openalex.org/W3100366369","https://openalex.org/W3100789280","https://openalex.org/W3101780148","https://openalex.org/W3127756416","https://openalex.org/W3187174963","https://openalex.org/W3198350258","https://openalex.org/W4205947740","https://openalex.org/W4206916871","https://openalex.org/W4211221179","https://openalex.org/W4214717370","https://openalex.org/W4223604879","https://openalex.org/W4234228486","https://openalex.org/W4237591687","https://openalex.org/W4241521318","https://openalex.org/W4241996101","https://openalex.org/W4385245566","https://openalex.org/W6602057636","https://openalex.org/W6636510571","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6640174482","https://openalex.org/W6674600207","https://openalex.org/W6674884181","https://openalex.org/W6675341372","https://openalex.org/W6677067356","https://openalex.org/W6677939520","https://openalex.org/W6679257226","https://openalex.org/W6679700999","https://openalex.org/W6679958247","https://openalex.org/W6681451320","https://openalex.org/W6682132143","https://openalex.org/W6682137061","https://openalex.org/W6683107984","https://openalex.org/W6683204974","https://openalex.org/W6683300800","https://openalex.org/W6683603353","https://openalex.org/W6684191040","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6688533166","https://openalex.org/W6692846177","https://openalex.org/W6693969136","https://openalex.org/W6695011786","https://openalex.org/W6696324988","https://openalex.org/W6703271639","https://openalex.org/W6715102896","https://openalex.org/W6718092244","https://openalex.org/W6730111887","https://openalex.org/W6731187701","https://openalex.org/W6731259203","https://openalex.org/W6732665253","https://openalex.org/W6734206676","https://openalex.org/W6734215269","https://openalex.org/W6734517396","https://openalex.org/W6736495777","https://openalex.org/W6738796088","https://openalex.org/W6740092555","https://openalex.org/W6740471745","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6743802245","https://openalex.org/W6746530577","https://openalex.org/W6747473740","https://openalex.org/W6748270630","https://openalex.org/W6748600884","https://openalex.org/W6748603076","https://openalex.org/W6748839928","https://openalex.org/W6749304979","https://openalex.org/W6750185400","https://openalex.org/W6750393176","https://openalex.org/W6752380930","https://openalex.org/W6755103542","https://openalex.org/W6756463683","https://openalex.org/W6757184387","https://openalex.org/W6760405395","https://openalex.org/W6760698134","https://openalex.org/W6763704811","https://openalex.org/W6764053384","https://openalex.org/W6765092683","https://openalex.org/W6766456521","https://openalex.org/W6771217966","https://openalex.org/W6772005887","https://openalex.org/W6776438516","https://openalex.org/W6779265984","https://openalex.org/W6783144354","https://openalex.org/W6922480057","https://openalex.org/W7055423279"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347","https://openalex.org/W4210805261"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,17,110,115,119],"learning":[2,11,18,26,68,100,116],"(DRL)":[3],"integrates":[4],"the":[5,13,30,60,93,103,131,150],"feature":[6],"representation":[7],"ability":[8,15],"of":[9,16,64,95,106,109,130,140,162],"deep":[10,99],"with":[12,71],"decision-making":[14],"so":[19],"that":[20,41],"it":[21],"can":[22],"achieve":[23],"powerful":[24],"end-to-end":[25],"control":[27,69],"capabilities.":[28],"In":[29,97,142],"past":[31],"decade,":[32],"DRL":[33,148,155,163],"has":[34,101],"made":[35],"substantial":[36],"advances":[37,151],"in":[38,59,67,152],"many":[39,56,107],"tasks":[40,70],"require":[42],"perceiving":[43],"high-dimensional":[44],"input":[45],"and":[46,62,76,84,91,121,136,146,167],"making":[47],"optimal":[48],"or":[49],"near-optimal":[50],"decisions.":[51],"However,":[52],"there":[53],"are":[54,156,164],"still":[55],"challenging":[57],"problems":[58,90],"theory":[61],"applications":[63],"DRL,":[65],"especially":[66],"limited":[72],"samples,":[73],"sparse":[74],"rewards,":[75],"multiple":[77],"agents.":[78],"Researchers":[79],"have":[80],"proposed":[81],"various":[82],"solutions":[83],"new":[85],"theories":[86],"to":[87,144],"solve":[88],"these":[89],"promote":[92],"development":[94,105],"DRL.":[96,141],"addition,":[98],"stimulated":[102],"further":[104],"subfields":[108],"learning,":[111,120],"such":[112],"as":[113],"hierarchical":[114],"(HRL),":[117],"multiagent":[118],"imitation":[122],"learning.":[123],"This":[124],"article":[125],"gives":[126],"a":[127],"comprehensive":[128],"overview":[129],"fundamental":[132],"theories,":[133],"key":[134],"algorithms,":[135,149],"primary":[137],"research":[138,160],"domains":[139],"addition":[143],"value-based":[145],"policy-based":[147],"maximum":[153],"entropy-based":[154],"summarized.":[157],"The":[158],"future":[159],"topics":[161],"also":[165],"analyzed":[166],"discussed.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":84},{"year":2025,"cited_by_count":368},{"year":2024,"cited_by_count":180},{"year":2023,"cited_by_count":76},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
