{"id":"https://openalex.org/W4416743863","doi":"https://doi.org/10.1007/s10462-025-11439-9","title":"Reinforcement learning for single-agent to multi-agent systems: from basic theory to industrial application progress, a survey","display_name":"Reinforcement learning for single-agent to multi-agent systems: from basic theory to industrial application progress, a survey","publication_year":2025,"publication_date":"2025-11-27","ids":{"openalex":"https://openalex.org/W4416743863","doi":"https://doi.org/10.1007/s10462-025-11439-9"},"language":"en","primary_location":{"id":"doi:10.1007/s10462-025-11439-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10462-025-11439-9","pdf_url":null,"source":{"id":"https://openalex.org/S122814990","display_name":"Artificial Intelligence Review","issn_l":"0269-2821","issn":["0269-2821","1573-7462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s10462-025-11439-9","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045879529","display_name":"Dehua Zhang","orcid":"https://orcid.org/0000-0001-8623-8439"},"institutions":[{"id":"https://openalex.org/I4210094772","display_name":"Henan University of Engineering","ror":"https://ror.org/007wym039","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210094772"]},{"id":"https://openalex.org/I36152291","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55","country_code":"CN","type":"education","lineage":["https://openalex.org/I36152291"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dehua Zhang","raw_affiliation_strings":["School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China","institution_ids":["https://openalex.org/I36152291","https://openalex.org/I4210094772"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036056174","display_name":"Qing\u2010Song Yuan","orcid":"https://orcid.org/0000-0002-9713-099X"},"institutions":[{"id":"https://openalex.org/I36152291","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55","country_code":"CN","type":"education","lineage":["https://openalex.org/I36152291"]},{"id":"https://openalex.org/I4210094772","display_name":"Henan University of Engineering","ror":"https://ror.org/007wym039","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210094772"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingsong Yuan","raw_affiliation_strings":["School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China","institution_ids":["https://openalex.org/I36152291","https://openalex.org/I4210094772"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063657790","display_name":"Lei Meng","orcid":"https://orcid.org/0000-0001-6713-655X"},"institutions":[{"id":"https://openalex.org/I4210094772","display_name":"Henan University of Engineering","ror":"https://ror.org/007wym039","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210094772"]},{"id":"https://openalex.org/I36152291","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55","country_code":"CN","type":"education","lineage":["https://openalex.org/I36152291"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Meng","raw_affiliation_strings":["School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China","institution_ids":["https://openalex.org/I36152291","https://openalex.org/I4210094772"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029886558","display_name":"Ruixue Xia","orcid":"https://orcid.org/0009-0004-3347-9983"},"institutions":[{"id":"https://openalex.org/I4210094772","display_name":"Henan University of Engineering","ror":"https://ror.org/007wym039","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210094772"]},{"id":"https://openalex.org/I36152291","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55","country_code":"CN","type":"education","lineage":["https://openalex.org/I36152291"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruixue Xia","raw_affiliation_strings":["School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China","institution_ids":["https://openalex.org/I36152291","https://openalex.org/I4210094772"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112635815","display_name":"Wei Liu","orcid":"https://orcid.org/0000-0003-4738-3195"},"institutions":[{"id":"https://openalex.org/I4210110718","display_name":"Nanyang Normal University","ror":"https://ror.org/01f7yer47","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210110718"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Liu","raw_affiliation_strings":["School of Intelligent Manufacturing and Electrical Engineering (Collaborative Innovation Center of Intelligent Explosion-proof Equipment, Henan Province), Nanyang Normal University, Wolong Road, Nanyang, 473061, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Intelligent Manufacturing and Electrical Engineering (Collaborative Innovation Center of Intelligent Explosion-proof Equipment, Henan Province), Nanyang Normal University, Wolong Road, Nanyang, 473061, Henan, China","institution_ids":["https://openalex.org/I4210110718"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069540002","display_name":"Chunbin Qin","orcid":"https://orcid.org/0000-0002-8238-5922"},"institutions":[{"id":"https://openalex.org/I4210094772","display_name":"Henan University of Engineering","ror":"https://ror.org/007wym039","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210094772"]},{"id":"https://openalex.org/I36152291","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55","country_code":"CN","type":"education","lineage":["https://openalex.org/I36152291"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunbin Qin","raw_affiliation_strings":["School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Henan University, North Section of Mingli Road, Zhengzhou, 450046, Henan, China","institution_ids":["https://openalex.org/I36152291","https://openalex.org/I4210094772"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5045879529"],"corresponding_institution_ids":["https://openalex.org/I36152291","https://openalex.org/I4210094772"],"apc_list":{"value":2490,"currency":"EUR","value_usd":3090},"apc_paid":{"value":2490,"currency":"EUR","value_usd":3090},"fwci":54.0174,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.99852674,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"59","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6039999723434448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6039999723434448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.3172999918460846,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.005900000222027302,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8129000067710876},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4984999895095825},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4194999933242798},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.35850000381469727},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.34549999237060547},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.3303000032901764},{"id":"https://openalex.org/keywords/development","display_name":"Development (topology)","score":0.32519999146461487}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8129000067710876},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7383999824523926},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5163000226020813},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4984999895095825},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4194999933242798},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.35339999198913574},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.34549999237060547},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C2776542497","wikidata":"https://www.wikidata.org/wiki/Q5266672","display_name":"Development (topology)","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C138020889","wikidata":"https://www.wikidata.org/wiki/Q2349659","display_name":"Collaborative learning","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2865000069141388},{"id":"https://openalex.org/C157170001","wikidata":"https://www.wikidata.org/wiki/Q4781507","display_name":"Applications of artificial intelligence","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C92393732","wikidata":"https://www.wikidata.org/wiki/Q1790374","display_name":"Learning theory","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10462-025-11439-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10462-025-11439-9","pdf_url":null,"source":{"id":"https://openalex.org/S122814990","display_name":"Artificial Intelligence Review","issn_l":"0269-2821","issn":["0269-2821","1573-7462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence Review","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10462-025-11439-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10462-025-11439-9","pdf_url":null,"source":{"id":"https://openalex.org/S122814990","display_name":"Artificial Intelligence Review","issn_l":"0269-2821","issn":["0269-2821","1573-7462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence Review","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":121,"referenced_works":["https://openalex.org/W1542941925","https://openalex.org/W1641379095","https://openalex.org/W1977234237","https://openalex.org/W1983320747","https://openalex.org/W2007995029","https://openalex.org/W2029670310","https://openalex.org/W2040870580","https://openalex.org/W2045031658","https://openalex.org/W2048687352","https://openalex.org/W2064853889","https://openalex.org/W2079359777","https://openalex.org/W2100677568","https://openalex.org/W2103151730","https://openalex.org/W2124477018","https://openalex.org/W2134672787","https://openalex.org/W2145339207","https://openalex.org/W2150593711","https://openalex.org/W2162569149","https://openalex.org/W2165396616","https://openalex.org/W2166559705","https://openalex.org/W2170302354","https://openalex.org/W2294798173","https://openalex.org/W2570494446","https://openalex.org/W2617547828","https://openalex.org/W2740924709","https://openalex.org/W2744293559","https://openalex.org/W2746553466","https://openalex.org/W2911964244","https://openalex.org/W2912471136","https://openalex.org/W2919912236","https://openalex.org/W2922729526","https://openalex.org/W2963317745","https://openalex.org/W2963658727","https://openalex.org/W2991046523","https://openalex.org/W2998049033","https://openalex.org/W3003902235","https://openalex.org/W3020899234","https://openalex.org/W3035141983","https://openalex.org/W3046723538","https://openalex.org/W3046760967","https://openalex.org/W3114263192","https://openalex.org/W3126468206","https://openalex.org/W3127561923","https://openalex.org/W3135757144","https://openalex.org/W3136319525","https://openalex.org/W3156829097","https://openalex.org/W3160974986","https://openalex.org/W3172247901","https://openalex.org/W3215884876","https://openalex.org/W4200120235","https://openalex.org/W4205835086","https://openalex.org/W4214717370","https://openalex.org/W4226033453","https://openalex.org/W4234438384","https://openalex.org/W4248966671","https://openalex.org/W4283789768","https://openalex.org/W4285151647","https://openalex.org/W4287322212","https://openalex.org/W4296706038","https://openalex.org/W4299689471","https://openalex.org/W4312479610","https://openalex.org/W4313000156","https://openalex.org/W4318605497","https://openalex.org/W4322576809","https://openalex.org/W4322731693","https://openalex.org/W4328027559","https://openalex.org/W4360584316","https://openalex.org/W4365129697","https://openalex.org/W4376274075","https://openalex.org/W4377145631","https://openalex.org/W4377700913","https://openalex.org/W4383112908","https://openalex.org/W4384819582","https://openalex.org/W4384833660","https://openalex.org/W4385194665","https://openalex.org/W4386362933","https://openalex.org/W4386453517","https://openalex.org/W4386472868","https://openalex.org/W4386820616","https://openalex.org/W4386824832","https://openalex.org/W4388469888","https://openalex.org/W4388483810","https://openalex.org/W4388579564","https://openalex.org/W4388579704","https://openalex.org/W4389337872","https://openalex.org/W4389542260","https://openalex.org/W4390075309","https://openalex.org/W4391248687","https://openalex.org/W4391407188","https://openalex.org/W4391409140","https://openalex.org/W4391807011","https://openalex.org/W4392405524","https://openalex.org/W4393207142","https://openalex.org/W4393380911","https://openalex.org/W4393972724","https://openalex.org/W4394595614","https://openalex.org/W4394841726","https://openalex.org/W4396753487","https://openalex.org/W4396853590","https://openalex.org/W4396941569","https://openalex.org/W4398249695","https://openalex.org/W4399375293","https://openalex.org/W4399618329","https://openalex.org/W4399728346","https://openalex.org/W4399939533","https://openalex.org/W4399994360","https://openalex.org/W4400678942","https://openalex.org/W4400678988","https://openalex.org/W4400857405","https://openalex.org/W4401608620","https://openalex.org/W4402624667","https://openalex.org/W4403022518","https://openalex.org/W4405489613","https://openalex.org/W4405599777","https://openalex.org/W4406258849","https://openalex.org/W4407373313","https://openalex.org/W4408399593","https://openalex.org/W4409333566","https://openalex.org/W4410660754","https://openalex.org/W4410721624","https://openalex.org/W4411408298"],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,58,63,85,166],"(RL),":[2],"as":[3,109,237,239],"an":[4,172],"emerging":[5],"interdisciplinary":[6],"field":[7,37],"formed":[8],"by":[9,26],"the":[10,36,45,52,68,77,93,101,110,123,127,190,194,200,227,240],"integration":[11],"of":[12,38,48,72,96,107,126,193,205,229,242],"artificial":[13,27],"intelligence":[14,28],"and":[15,29,60,70,79,83,90,104,113,132,149,154,167,197,202,224,232],"control":[16,231],"science,":[17],"is":[18],"currently":[19],"demonstrating":[20],"a":[21,32,185,217],"cross-disciplinary":[22],"development":[23,46,71,228],"trend":[24],"led":[25],"has":[30],"become":[31],"research":[33],"hotspot":[34],"in":[35,87,129,207,221],"optimal":[39],"control.":[40],"This":[41,181,212],"paper":[42,139,213],"systematically":[43],"reviews":[44],"context":[47],"RL,":[49,73],"focusing":[50],"on":[51,76],"intrinsic":[53],"connection":[54],"between":[55,81],"single-agent":[56,177],"reinforcement":[57,62],"(SARL)":[59],"multi-agent":[61,116,179],"(MARL).":[64],"Firstly,":[65],"starting":[66],"from":[67,176],"formation":[69],"it":[74,121],"elaborates":[75],"similarities":[78],"differences":[80],"RL":[82],"other":[84],"paradigms":[86],"machine":[88],"learning,":[89,164,169],"briefly":[91],"introduces":[92],"main":[94],"branches":[95],"current":[97],"RL.":[98],"Then,":[99],"with":[100],"basic":[102,111],"knowledge":[103],"core":[105],"ideas":[106,204],"SARL":[108,141],"framework,":[112],"expanding":[114],"to":[115,178,215,225],"system":[117],"(MAS)":[118],"collaborative":[119,168],"control,":[120],"explores":[122],"coherence":[124],"characteristics":[125],"two":[128,195],"theoretical":[130],"frameworks":[131],"algorithm":[133,173],"design.":[134],"On":[135],"this":[136,138,222],"basis,":[137],"reconfigures":[140],"algorithms":[142,157],"into":[143,158],"dynamic":[144],"programming,":[145],"value":[146],"function":[147],"decomposition":[148],"policy":[150],"gradient":[151],"(PG)":[152],"type,":[153],"abstracts":[155],"MARL":[156,206],"four":[159],"paradigms:":[160],"behavior":[161],"analysis,":[162],"centralized":[163],"communication":[165],"thus":[170],"establishing":[171],"mapping":[174],"relationship":[175],"scenarios.":[180],"innovative":[182],"framework":[183],"provides":[184],"new":[186],"perspective":[187],"for":[188,219,235],"understanding":[189],"evolutionary":[191],"correlation":[192],"methods,":[196],"also":[198],"discusses":[199],"challenges":[201],"solution":[203],"solving":[208],"large-scale":[209],"MAS":[210,236],"problems.":[211],"aims":[214],"provide":[216],"reference":[218],"researchers":[220],"field,":[223],"promote":[226],"cooperative":[230],"optimization":[233],"methods":[234],"well":[238],"advancement":[241],"related":[243],"application":[244],"research.":[245]},"counts_by_year":[{"year":2026,"cited_by_count":25},{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-11-28T00:00:00"}
