{"id":"https://openalex.org/W2002260889","doi":"https://doi.org/10.1109/tsmcb.2011.2166384","title":"Reinforcement Learning Controller Design for Affine Nonlinear Discrete-Time Systems using Online Approximators","display_name":"Reinforcement Learning Controller Design for Affine Nonlinear Discrete-Time Systems using Online Approximators","publication_year":2011,"publication_date":"2011-09-28","ids":{"openalex":"https://openalex.org/W2002260889","doi":"https://doi.org/10.1109/tsmcb.2011.2166384","mag":"2002260889","pmid":"https://pubmed.ncbi.nlm.nih.gov/21947529"},"language":"en","primary_location":{"id":"doi:10.1109/tsmcb.2011.2166384","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmcb.2011.2166384","pdf_url":null,"source":{"id":"https://openalex.org/S4210170378","display_name":"IEEE Transactions on Systems Man and Cybernetics Part B (Cybernetics)","issn_l":"1083-4419","issn":["1083-4419","1941-0492"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062534500","display_name":"Qinmin Yang","orcid":"https://orcid.org/0000-0002-1602-8986"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I4391767838","display_name":"State Key Laboratory of Industrial Control Technology","ror":"https://ror.org/03a33a786","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391767838","https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qinmin Yang","raw_affiliation_strings":["State Key Laboratory of Industrial Control Technology, Department of Control Science and Engineering, Zhejiang University, Hangzhou 310027, China. qmyang@iipc.zju.edu.cn","Department of Control Science & Engineering, Zhejiang University, Hangzhou, CHINA"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Industrial Control Technology, Department of Control Science and Engineering, Zhejiang University, Hangzhou 310027, China. qmyang@iipc.zju.edu.cn","institution_ids":["https://openalex.org/I55712492","https://openalex.org/I4391767838"]},{"raw_affiliation_string":"Department of Control Science & Engineering, Zhejiang University, Hangzhou, CHINA","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078910343","display_name":"S. Jagannathan","orcid":"https://orcid.org/0000-0002-2310-3737"},"institutions":[{"id":"https://openalex.org/I20382870","display_name":"Missouri University of Science and Technology","ror":"https://ror.org/00scwqd12","country_code":"US","type":"education","lineage":["https://openalex.org/I20382870"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S. Jagannathan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Missouri University of Science and Technology, Rolla, MO, USA","Dept. of Electr. & Comput. Eng., Missouri Univ. of Sci. & Technol., Rolla, MO, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Missouri University of Science and Technology, Rolla, MO, USA","institution_ids":["https://openalex.org/I20382870"]},{"raw_affiliation_string":"Dept. of Electr. & Comput. Eng., Missouri Univ. of Sci. & Technol., Rolla, MO, USA","institution_ids":["https://openalex.org/I20382870"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062534500"],"corresponding_institution_ids":["https://openalex.org/I4391767838","https://openalex.org/I55712492","https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":11.371,"has_fulltext":false,"cited_by_count":188,"citation_normalized_percentile":{"value":0.98977029,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"42","issue":"2","first_page":"377","last_page":"390"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10040","display_name":"Adaptive Control of Nonlinear Systems","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.7415807843208313},{"id":"https://openalex.org/keywords/inverted-pendulum","display_name":"Inverted pendulum","score":0.6630051136016846},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.566339910030365},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5602312684059143},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.5213416814804077},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4866669774055481},{"id":"https://openalex.org/keywords/observer","display_name":"Observer (physics)","score":0.47839510440826416},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.47769811749458313},{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.46778616309165955},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.4118032455444336},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.3495709300041199},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2966960668563843},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2892768085002899},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2179436981678009},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1349336802959442},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.12111994624137878}],"concepts":[{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.7415807843208313},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.6630051136016846},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.566339910030365},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5602312684059143},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.5213416814804077},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4866669774055481},{"id":"https://openalex.org/C2780704645","wikidata":"https://www.wikidata.org/wiki/Q9251458","display_name":"Observer (physics)","level":2,"score":0.47839510440826416},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.47769811749458313},{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.46778616309165955},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.4118032455444336},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.3495709300041199},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2966960668563843},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2892768085002899},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2179436981678009},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1349336802959442},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.12111994624137878},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tsmcb.2011.2166384","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmcb.2011.2166384","pdf_url":null,"source":{"id":"https://openalex.org/S4210170378","display_name":"IEEE Transactions on Systems Man and Cybernetics Part B (Cybernetics)","issn_l":"1083-4419","issn":["1083-4419","1941-0492"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)","raw_type":"journal-article"},{"id":"pmid:21947529","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/21947529","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on systems, man, and cybernetics. Part B, Cybernetics : a publication of the IEEE Systems, Man, and Cybernetics Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W266470921","https://openalex.org/W1162378864","https://openalex.org/W1517236425","https://openalex.org/W1557517019","https://openalex.org/W1574514837","https://openalex.org/W1854776945","https://openalex.org/W1969166509","https://openalex.org/W1986278072","https://openalex.org/W1995622844","https://openalex.org/W2005229381","https://openalex.org/W2049063292","https://openalex.org/W2059147929","https://openalex.org/W2062533874","https://openalex.org/W2077195478","https://openalex.org/W2090167557","https://openalex.org/W2091565802","https://openalex.org/W2093831009","https://openalex.org/W2098432798","https://openalex.org/W2100677568","https://openalex.org/W2104346286","https://openalex.org/W2115249980","https://openalex.org/W2121863487","https://openalex.org/W2131398727","https://openalex.org/W2136064843","https://openalex.org/W2145830976","https://openalex.org/W2154549708","https://openalex.org/W2160561608","https://openalex.org/W2165501837","https://openalex.org/W3041202696","https://openalex.org/W4214717370","https://openalex.org/W4230466265","https://openalex.org/W4298300677"],"related_works":["https://openalex.org/W962423920","https://openalex.org/W2387968248","https://openalex.org/W2350715914","https://openalex.org/W2120821724","https://openalex.org/W2379529020","https://openalex.org/W2101175215","https://openalex.org/W2893549521","https://openalex.org/W2330311678","https://openalex.org/W2352322616","https://openalex.org/W2574438335"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"reinforcement":[3],"learning":[4],"state-":[5],"and":[6,24,54,94,129,176],"output-feedback-based":[7],"adaptive":[8],"critic":[9,56,67,95],"controller":[10,39,143],"designs":[11],"are":[12,88,145],"proposed":[13,38],"by":[14],"using":[15,76,157],"the":[16,32,60,63,69,92,112,121,125,142,154,161,164],"online":[17,75],"approximators":[18],"(OLAs)":[19],"for":[20,91,141],"a":[21,55,172,177],"general":[22],"multi-input":[23],"multioutput":[25],"affine":[26],"unknown":[27],"nonlinear":[28],"discretetime":[29],"systems":[30],"in":[31,169],"presence":[33],"of":[34,62,153,163],"bounded":[35],"disturbances.":[36],"The":[37,66,136],"design":[40],"has":[41],"two":[42,165],"entities,":[43],"an":[44,115],"action":[45,64,93],"network":[46,57],"that":[47,58],"is":[48,73,118,133,167],"designed":[49],"to":[50,123],"produce":[51],"optimal":[52],"signal":[53],"evaluates":[59],"performance":[61],"network.":[65],"estimates":[68],"cost-to-go":[70],"function":[71],"which":[72],"tuned":[74],"recursive":[77],"equations":[78],"derived":[79,147],"from":[80],"heuristic":[81],"dynamic":[82],"programming.":[83],"Here,":[84],"neural":[85],"networks":[86],"(NNs)":[87],"used":[89],"both":[90],"whereas":[96],"any":[97],"OLAs,":[98],"such":[99],"as":[100,120],"radial":[101],"basis":[102],"functions,":[103],"splines,":[104],"fuzzy":[105],"logic,":[106],"etc.,":[107],"can":[108],"be":[109],"utilized.":[110],"For":[111],"output-feedback":[113],"counterpart,":[114],"additional":[116],"NN":[117,137],"designated":[119],"observer":[122],"estimate":[124],"unavailable":[126],"system":[127,156,175],"states,":[128],"thus,":[130],"separation":[131],"principle":[132],"not":[134],"required.":[135],"weight":[138],"tuning":[139],"laws":[140],"schemes":[144],"also":[146],"while":[148],"ensuring":[149],"uniform":[150],"ultimate":[151],"boundedness":[152],"closed-loop":[155],"Lyapunov":[158],"theory.":[159],"Finally,":[160],"effectiveness":[162],"controllers":[166],"tested":[168],"simulation":[170],"on":[171],"pendulum":[173],"balancing":[174],"two-link":[178],"robotic":[179],"arm":[180],"system.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":20},{"year":2018,"cited_by_count":14},{"year":2017,"cited_by_count":15},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":21},{"year":2013,"cited_by_count":12},{"year":2012,"cited_by_count":2}],"updated_date":"2026-03-03T08:47:05.690250","created_date":"2025-10-10T00:00:00"}
