{"id":"https://openalex.org/W3133357144","doi":"https://doi.org/10.1109/tnnls.2021.3055761","title":"Optimal Tracking Control of Nonlinear Multiagent Systems Using Internal Reinforce Q-Learning","display_name":"Optimal Tracking Control of Nonlinear Multiagent Systems Using Internal Reinforce Q-Learning","publication_year":2021,"publication_date":"2021-02-16","ids":{"openalex":"https://openalex.org/W3133357144","doi":"https://doi.org/10.1109/tnnls.2021.3055761","mag":"3133357144","pmid":"https://pubmed.ncbi.nlm.nih.gov/33587710"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3055761","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3055761","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017639281","display_name":"Zhinan Peng","orcid":"https://orcid.org/0000-0003-2239-4920"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhinan Peng","raw_affiliation_strings":["School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011381205","display_name":"Rui Luo","orcid":"https://orcid.org/0000-0002-3571-7461"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Luo","raw_affiliation_strings":["School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022155749","display_name":"Jiangping Hu","orcid":"https://orcid.org/0000-0002-7559-8604"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangping Hu","raw_affiliation_strings":["School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013941056","display_name":"Kaibo Shi","orcid":"https://orcid.org/0000-0002-9863-9229"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaibo Shi","raw_affiliation_strings":["School of Information Science and Engineering, Chengdu University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Chengdu University, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114377928","display_name":"Sing Kiong Nguang","orcid":"https://orcid.org/0000-0003-4527-0082"},"institutions":[{"id":"https://openalex.org/I154130895","display_name":"University of Auckland","ror":"https://ror.org/03b94tp07","country_code":"NZ","type":"education","lineage":["https://openalex.org/I154130895"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Sing Kiong Nguang","raw_affiliation_strings":["Department of Electrical, Computer and Software Engineering, The University of Auckland, Auckland, New Zealand"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Computer and Software Engineering, The University of Auckland, Auckland, New Zealand","institution_ids":["https://openalex.org/I154130895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067851724","display_name":"Bijoy K. Ghosh","orcid":null},"institutions":[{"id":"https://openalex.org/I12315562","display_name":"Texas Tech University","ror":"https://ror.org/0405mnx93","country_code":"US","type":"education","lineage":["https://openalex.org/I12315562"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bijoy Kumar Ghosh","raw_affiliation_strings":["Department of Mathematics and Statistics, Texas Tech University, Lubbock, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Statistics, Texas Tech University, Lubbock, TX, USA","institution_ids":["https://openalex.org/I12315562"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5017639281"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":12.8042,"has_fulltext":false,"cited_by_count":108,"citation_normalized_percentile":{"value":0.99001716,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"33","issue":"8","first_page":"4043","last_page":"4055"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12047","display_name":"Viral Infections and Vectors","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7848432064056396},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.699858546257019},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6541110277175903},{"id":"https://openalex.org/keywords/iterative-learning-control","display_name":"Iterative learning control","score":0.6033737063407898},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.5678170323371887},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5661953687667847},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.564685583114624},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5258888602256775},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5174437165260315},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5100010633468628},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.491115540266037},{"id":"https://openalex.org/keywords/internal-model","display_name":"Internal model","score":0.49061957001686096},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.48797911405563354},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.48593607544898987},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.48531216382980347},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.44118767976760864},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.41487592458724976},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3119359016418457},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2163390815258026},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19879651069641113}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7848432064056396},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.699858546257019},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6541110277175903},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.6033737063407898},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.5678170323371887},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5661953687667847},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.564685583114624},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5258888602256775},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5174437165260315},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5100010633468628},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.491115540266037},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.49061957001686096},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.48797911405563354},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.48593607544898987},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.48531216382980347},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.44118767976760864},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.41487592458724976},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3119359016418457},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2163390815258026},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19879651069641113},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2021.3055761","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3055761","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:33587710","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33587710","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2009544653","display_name":null,"funder_award_id":"61473061","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2640660485","display_name":null,"funder_award_id":"61703060","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3413405091","display_name":null,"funder_award_id":"61104104","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4620896478","display_name":null,"funder_award_id":"NCET-13-0091","funder_id":"https://openalex.org/F4320334924","funder_display_name":"Program for New Century Excellent Talents in University"},{"id":"https://openalex.org/G5158039713","display_name":null,"funder_award_id":"21YYJC0469","funder_id":"https://openalex.org/F4320333335","funder_display_name":"Sichuan Province Science and Technology Support Program"},{"id":"https://openalex.org/G5549161777","display_name":null,"funder_award_id":"2020YFSY0012","funder_id":"https://openalex.org/F4320333335","funder_display_name":"Sichuan Province Science and Technology Support Program"},{"id":"https://openalex.org/G8415611289","display_name":null,"funder_award_id":"71503206","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320333335","display_name":"Sichuan Province Science and Technology Support Program","ror":null},{"id":"https://openalex.org/F4320334924","display_name":"Program for New Century Excellent Talents in University","ror":"https://ror.org/01mv9t934"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1662782209","https://openalex.org/W1968908471","https://openalex.org/W1981685742","https://openalex.org/W1982107603","https://openalex.org/W1983267717","https://openalex.org/W1991119064","https://openalex.org/W2005437559","https://openalex.org/W2012451615","https://openalex.org/W2013895638","https://openalex.org/W2019338681","https://openalex.org/W2024303516","https://openalex.org/W2064267250","https://openalex.org/W2065632630","https://openalex.org/W2086977346","https://openalex.org/W2092215165","https://openalex.org/W2099175737","https://openalex.org/W2108383324","https://openalex.org/W2124829859","https://openalex.org/W2129660761","https://openalex.org/W2132858840","https://openalex.org/W2139416664","https://openalex.org/W2139612737","https://openalex.org/W2152753105","https://openalex.org/W2165501837","https://openalex.org/W2165726932","https://openalex.org/W2170899200","https://openalex.org/W2183137222","https://openalex.org/W2227909145","https://openalex.org/W2264554855","https://openalex.org/W2277723519","https://openalex.org/W2320262435","https://openalex.org/W2339745223","https://openalex.org/W2343912161","https://openalex.org/W2345265966","https://openalex.org/W2417709453","https://openalex.org/W2462102501","https://openalex.org/W2529970964","https://openalex.org/W2606378794","https://openalex.org/W2734340801","https://openalex.org/W2769909638","https://openalex.org/W2780814805","https://openalex.org/W2809115421","https://openalex.org/W2903034862","https://openalex.org/W2908445768","https://openalex.org/W2910904511","https://openalex.org/W2913259157","https://openalex.org/W2921163467","https://openalex.org/W2943141190","https://openalex.org/W2945073974","https://openalex.org/W2969456553","https://openalex.org/W2973026380","https://openalex.org/W2979719459","https://openalex.org/W2986211651","https://openalex.org/W3020748148","https://openalex.org/W4214717370","https://openalex.org/W6766694020"],"related_works":["https://openalex.org/W2761624296","https://openalex.org/W4386994694","https://openalex.org/W4388738109","https://openalex.org/W2120878530","https://openalex.org/W1633334157","https://openalex.org/W2047374070","https://openalex.org/W2279240319","https://openalex.org/W2517761895","https://openalex.org/W2129408120","https://openalex.org/W2946379666"],"abstract_inverted_index":{"In":[0,66],"this":[1],"article,":[2],"a":[3,70,102,142],"novel":[4],"reinforcement":[5],"learning":[6,105],"(RL)":[7],"method":[8,37,160],"is":[9,38,48,72,85,111,121,139],"developed":[10,86],"to":[11,53,87,163],"solve":[12],"the":[13,26,63,67,75,95,114,117,125,128,131,149,158,165,168],"optimal":[14,29,132],"tracking":[15,170],"control":[16,30,91,133,171],"problem":[17],"of":[18,57,77,116,148,167],"unknown":[19],"nonlinear":[20],"multiagent":[21],"systems":[22],"(MASs).":[23],"Different":[24],"from":[25,62],"representative":[27],"RL-based":[28],"algorithms,":[31],"an":[32,42,81],"internal":[33,43],"reinforce":[34,44],"Q-learning":[35],"(IrQ-L)":[36],"proposed,":[39],"in":[40,113,141],"which":[41,120],"reward":[45],"(IRR)":[46],"function":[47,79],"introduced":[49],"for":[50],"each":[51],"agent":[52],"improve":[54],"its":[55],"capability":[56],"receiving":[58],"more":[59],"long-term":[60],"information":[61],"local":[64],"environment.":[65],"IrQL":[68,83],"designs,":[69],"Q-function":[71],"defined":[73],"on":[74],"basis":[76],"IRR":[78,126],"and":[80,98,130,154],"iterative":[82],"algorithm":[84],"learn":[88],"optimally":[89],"distributed":[90,103],"scheme,":[92,134],"followed":[93],"by":[94],"rigorous":[96],"convergence":[97],"stability":[99],"analysis.":[100],"Furthermore,":[101],"online":[104],"framework,":[106],"namely,":[107],"reinforce-critic-actor":[108],"neural":[109],"networks,":[110],"established":[112],"implementation":[115],"proposed":[118,169],"approach,":[119],"aimed":[122],"at":[123],"estimating":[124],"function,":[127],"Q-function,":[129],"respectively.":[135],"The":[136],"implemented":[137],"procedure":[138],"designed":[140],"data-driven":[143],"way":[144],"without":[145],"needing":[146],"knowledge":[147],"system":[150],"dynamics.":[151],"Finally,":[152],"simulations":[153],"comparison":[155],"results":[156],"with":[157],"classical":[159],"are":[161],"given":[162],"demonstrate":[164],"effectiveness":[166],"method.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":20},{"year":2023,"cited_by_count":29},{"year":2022,"cited_by_count":27},{"year":2021,"cited_by_count":9}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
