{"id":"https://openalex.org/W7155650808","doi":"https://doi.org/10.1016/j.array.2026.100812","title":"A review of reinforcement learning: A tripartite framework of environment design, algorithmic innovation, and application scenarios","display_name":"A review of reinforcement learning: A tripartite framework of environment design, algorithmic innovation, and application scenarios","publication_year":2026,"publication_date":"2026-04-25","ids":{"openalex":"https://openalex.org/W7155650808","doi":"https://doi.org/10.1016/j.array.2026.100812"},"language":"en","primary_location":{"id":"doi:10.1016/j.array.2026.100812","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.array.2026.100812","pdf_url":null,"source":{"id":"https://openalex.org/S4210194039","display_name":"Array","issn_l":"2590-0056","issn":["2590-0056"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Array","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1016/j.array.2026.100812","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124122636","display_name":"Yingli Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingli Liu","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134618629","display_name":"Zheng Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Xiong","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080388353","display_name":"Ling Yang","orcid":"https://orcid.org/0000-0002-8385-3564"},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ling Yang","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China"],"raw_orcid":"https://orcid.org/0000-0002-8385-3564","affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China","institution_ids":["https://openalex.org/I10660446"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5134607928","display_name":"Tao Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I10660446","display_name":"Kunming University of Science and Technology","ror":"https://ror.org/00xyeez13","country_code":"CN","type":"education","lineage":["https://openalex.org/I10660446"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tao Shen","raw_affiliation_strings":["Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Engineering and Automation, Kunming University of Science and Technology, Kunming 650093, China","institution_ids":["https://openalex.org/I10660446"]},{"raw_affiliation_string":"Yunnan Key Laboratory of Computer Technologies Application, Kunming University of Science and Technology, Kunming 650500, China","institution_ids":["https://openalex.org/I10660446"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080388353","https://openalex.org/A5134607928"],"corresponding_institution_ids":["https://openalex.org/I10660446"],"apc_list":{"value":1350,"currency":"USD","value_usd":1350},"apc_paid":{"value":1350,"currency":"USD","value_usd":1350},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86208408,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"30","issue":null,"first_page":"100812","last_page":"100812"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.44679999351501465,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.44679999351501465,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.17030000686645508,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.03720000013709068,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8672000169754028},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4341000020503998},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43389999866485596},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4296000003814697},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4237000048160553},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.391400009393692},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.36410000920295715},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.36340001225471497}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8672000169754028},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7010999917984009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5435000061988831},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47620001435279846},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43389999866485596},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4296000003814697},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.391400009393692},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.36410000920295715},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.36340001225471497},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.34360000491142273},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C13736549","wikidata":"https://www.wikidata.org/wiki/Q4489420","display_name":"Industrial engineering","level":1,"score":0.2759999930858612},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2567000091075897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.array.2026.100812","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.array.2026.100812","pdf_url":null,"source":{"id":"https://openalex.org/S4210194039","display_name":"Array","issn_l":"2590-0056","issn":["2590-0056"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Array","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.array.2026.100812","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.array.2026.100812","pdf_url":null,"source":{"id":"https://openalex.org/S4210194039","display_name":"Array","issn_l":"2590-0056","issn":["2590-0056"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Array","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4876011312007904}],"awards":[],"funders":[{"id":"https://openalex.org/F4320336602","display_name":"Major Science and Technology Projects in Yunnan Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":105,"referenced_works":["https://openalex.org/W1969483458","https://openalex.org/W2041106615","https://openalex.org/W2081596682","https://openalex.org/W2083958377","https://openalex.org/W2086369010","https://openalex.org/W2100557841","https://openalex.org/W2100677568","https://openalex.org/W2123151547","https://openalex.org/W2141712152","https://openalex.org/W2145339207","https://openalex.org/W2147750403","https://openalex.org/W2169528473","https://openalex.org/W2254533881","https://openalex.org/W2257979135","https://openalex.org/W2334782222","https://openalex.org/W2547104198","https://openalex.org/W2625517426","https://openalex.org/W2625874945","https://openalex.org/W2748828099","https://openalex.org/W2762709466","https://openalex.org/W2773987374","https://openalex.org/W2793528497","https://openalex.org/W2808863072","https://openalex.org/W2884008866","https://openalex.org/W2907400679","https://openalex.org/W2921628782","https://openalex.org/W2948652605","https://openalex.org/W2949986955","https://openalex.org/W2963855901","https://openalex.org/W2971893337","https://openalex.org/W2973950680","https://openalex.org/W2982316857","https://openalex.org/W2982984621","https://openalex.org/W2989847975","https://openalex.org/W2990022244","https://openalex.org/W3008110675","https://openalex.org/W3009593063","https://openalex.org/W3029795912","https://openalex.org/W3037631072","https://openalex.org/W3083167820","https://openalex.org/W3088310808","https://openalex.org/W3093615681","https://openalex.org/W3096306506","https://openalex.org/W3097328883","https://openalex.org/W3100789280","https://openalex.org/W3121342653","https://openalex.org/W3126468206","https://openalex.org/W3128870359","https://openalex.org/W3129946480","https://openalex.org/W3130441055","https://openalex.org/W3156926985","https://openalex.org/W3170730117","https://openalex.org/W3179318616","https://openalex.org/W3207692912","https://openalex.org/W3214346043","https://openalex.org/W4200580349","https://openalex.org/W4210719966","https://openalex.org/W4211183372","https://openalex.org/W4229056709","https://openalex.org/W4241811150","https://openalex.org/W4247446124","https://openalex.org/W4285041537","https://openalex.org/W4289641564","https://openalex.org/W4296558875","https://openalex.org/W4304203195","https://openalex.org/W4307821105","https://openalex.org/W4310238115","https://openalex.org/W4312837849","https://openalex.org/W4312850441","https://openalex.org/W4315640913","https://openalex.org/W4315647850","https://openalex.org/W4319074581","https://openalex.org/W4319084034","https://openalex.org/W4319988650","https://openalex.org/W4320919539","https://openalex.org/W4321463929","https://openalex.org/W4328007675","https://openalex.org/W4360825542","https://openalex.org/W4360930683","https://openalex.org/W4361292099","https://openalex.org/W4379977613","https://openalex.org/W4383220085","https://openalex.org/W4385302721","https://openalex.org/W4385383325","https://openalex.org/W4385392447","https://openalex.org/W4386453649","https://openalex.org/W4387910977","https://openalex.org/W4388019183","https://openalex.org/W4388801288","https://openalex.org/W4389443166","https://openalex.org/W4390202039","https://openalex.org/W4391164281","https://openalex.org/W4391317152","https://openalex.org/W4391609213","https://openalex.org/W4392470951","https://openalex.org/W4394581738","https://openalex.org/W4396552608","https://openalex.org/W4399095033","https://openalex.org/W4399443974","https://openalex.org/W4400591984","https://openalex.org/W4400951049","https://openalex.org/W4401106290","https://openalex.org/W4404491734","https://openalex.org/W4409377352","https://openalex.org/W4413786800"],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,110,161,243],"is":[2],"gradually":[3],"shifting":[4],"from":[5,115,144],"a":[6,48,66,230],"research":[7],"paradigm":[8],"dominated":[9],"by":[10],"games":[11],"and":[12,22,34,59,76,107,112,124,142,156,168,173,180,193,198,202,212,219,234],"simulations":[13],"toward":[14],"real":[15,60,245,255],"complex":[16,246,256],"scenarios":[17],"with":[18,221],"high":[19,23],"safety":[20],"requirements":[21],"costs,":[24],"such":[25,164,188,207],"as":[26,127,129,165,189,208],"energy":[27,171],"systems,":[28,247],"industrial":[29,176],"control,":[30,178],"robotics,":[31],"medical":[32],"decision-making,":[33,191],"AI":[35,181],"assistants.":[36,182],"However,":[37],"existing":[38],"surveys":[39],"are":[40],"mostly":[41],"centered":[42],"on":[43,96,109],"algorithms":[44],"or":[45],"applications,":[46],"lacking":[47],"systematic":[49],"analysis":[50],"of":[51,71,86,100,159,241],"the":[52,83,90,98,116,136,145,150,238],"interactions":[53],"among":[54,138],"environment":[55,72,91],"modeling,":[56],"algorithm":[57,117],"selection,":[58],"deployment":[61],"constraints.":[62],"This":[63,225],"paper":[64,226],"proposes":[65],"ternary":[67],"collaborative":[68],"framework":[69],"composed":[70],"design,":[73],"algorithmic":[74],"innovation,":[75],"application":[77,146],"scenarios,":[78],"to":[79,228,250],"systematically":[80],"sort":[81],"out":[82],"development":[84],"path":[85],"reinforcement":[87,160,214,217,242],"learning.":[88],"From":[89],"design":[92,152],"dimension,":[93,118,147],"it":[94,119,148],"focuses":[95],"analyzing":[97],"influence":[99],"feature":[101],"distribution,":[102],"reward":[103],"mechanism,":[104],"dynamic":[105],"uncertainty,":[106],"scalability":[108],"stability":[111],"generalization":[113],"ability;":[114],"compares":[120],"value-based,":[121],"policy-based,":[122],"model-based,":[123],"Actor\u2013Critic":[125],"frameworks":[126],"well":[128],"representative":[130],"methods":[131],"in":[132,162,244,248,254],"recent":[133],"years,":[134],"emphasizing":[135],"trade-off":[137],"stability,":[139],"sample":[140],"efficiency,":[141],"deployability;":[143],"summarizes":[149,185],"common":[151],"patterns,":[153],"evaluation":[154],"metrics,":[155],"key":[157],"challenges":[158],"fields":[163],"games,":[166],"robotics":[167],"autonomous":[169],"driving,":[170],"systems":[172],"smart":[174],"grids,":[175],"process":[177],"healthcare,":[179],"It":[183],"further":[184],"cross-domain":[186],"problems":[187],"high-dimensional":[190],"sparse":[192],"delayed":[194],"feedback,":[195],"parameter":[196],"sensitivity,":[197],"long-term":[199],"credit":[200],"assignment,":[201],"discusses":[203],"potential":[204],"solution":[205],"directions":[206],"world":[209],"models,":[210],"safe":[211],"constrained":[213],"learning,":[215,218],"offline":[216],"integration":[220],"large":[222],"language":[223],"models.":[224],"aims":[227],"provide":[229],"unified":[231],"analytical":[232],"perspective":[233],"practical":[235],"reference":[236],"for":[237],"engineering":[239,252],"implementation":[240,253],"order":[249],"support":[251],"systems.":[257]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2026-04-26T00:00:00"}
