{"id":"https://openalex.org/W3154290186","doi":"https://doi.org/10.1109/tnnls.2021.3070852","title":"Adaptive Observation-Based Efficient Reinforcement Learning for Uncertain Systems","display_name":"Adaptive Observation-Based Efficient Reinforcement Learning for Uncertain Systems","publication_year":2021,"publication_date":"2021-04-16","ids":{"openalex":"https://openalex.org/W3154290186","doi":"https://doi.org/10.1109/tnnls.2021.3070852","mag":"3154290186","pmid":"https://pubmed.ncbi.nlm.nih.gov/33861708"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3070852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3070852","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082230080","display_name":"Maopeng Ran","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Maopeng Ran","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100365448","display_name":"Lihua Xie","orcid":"https://orcid.org/0000-0002-7137-4136"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Lihua Xie","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5082230080"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":2.1205,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.88703278,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"33","issue":"10","first_page":"5492","last_page":"5503"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8477290868759155},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.7243061661720276},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6327584981918335},{"id":"https://openalex.org/keywords/observer","display_name":"Observer (physics)","score":0.6075283288955688},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5707215666770935},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5050562024116516},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.49691155552864075},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4728161096572876},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.433803915977478},{"id":"https://openalex.org/keywords/ideal","display_name":"Ideal (ethics)","score":0.41964471340179443},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.25701332092285156},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24543240666389465},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1875639259815216},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.17747873067855835}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8477290868759155},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.7243061661720276},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6327584981918335},{"id":"https://openalex.org/C2780704645","wikidata":"https://www.wikidata.org/wiki/Q9251458","display_name":"Observer (physics)","level":2,"score":0.6075283288955688},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5707215666770935},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5050562024116516},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.49691155552864075},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4728161096572876},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.433803915977478},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.41964471340179443},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25701332092285156},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24543240666389465},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1875639259815216},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.17747873067855835},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003198","descriptor_name":"Computer Simulation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003198","descriptor_name":"Computer Simulation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003198","descriptor_name":"Computer Simulation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2021.3070852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3070852","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:33861708","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33861708","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6968088389","display_name":null,"funder_award_id":"61720106011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W5108103","https://openalex.org/W132264163","https://openalex.org/W560518094","https://openalex.org/W648594167","https://openalex.org/W1540245649","https://openalex.org/W1907796993","https://openalex.org/W1914756871","https://openalex.org/W1983523797","https://openalex.org/W1998361253","https://openalex.org/W2003233927","https://openalex.org/W2024303516","https://openalex.org/W2067603665","https://openalex.org/W2073591687","https://openalex.org/W2073990179","https://openalex.org/W2085194340","https://openalex.org/W2086975818","https://openalex.org/W2100325486","https://openalex.org/W2106420625","https://openalex.org/W2107726111","https://openalex.org/W2111034649","https://openalex.org/W2113501460","https://openalex.org/W2148439597","https://openalex.org/W2163114046","https://openalex.org/W2467518411","https://openalex.org/W2562093128","https://openalex.org/W2727279496","https://openalex.org/W2766918150","https://openalex.org/W2767784613","https://openalex.org/W2772589676","https://openalex.org/W2780814805","https://openalex.org/W2794243919","https://openalex.org/W2795039172","https://openalex.org/W2802027610","https://openalex.org/W2885709261","https://openalex.org/W2888496832","https://openalex.org/W2911032305","https://openalex.org/W2935829912","https://openalex.org/W2979961302","https://openalex.org/W2996000010","https://openalex.org/W3003040443","https://openalex.org/W3004169446","https://openalex.org/W3016201194","https://openalex.org/W3034456914","https://openalex.org/W3103456419","https://openalex.org/W6804409665"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W3087814763","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W4376605461","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2952356279"],"abstract_inverted_index":{"This":[0,36],"article":[1],"develops":[2],"an":[3],"adaptive":[4,21],"observation-based":[5],"efficient":[6],"reinforcement":[7],"learning":[8,20,60],"(RL)":[9],"approach":[10],"for":[11,77],"systems":[12],"with":[13],"uncertain":[14],"drift":[15],"dynamics.":[16],"A":[17],"novel":[18],"concurrent":[19,59],"extended":[22],"observer":[23,37],"(CL-AEO)":[24],"is":[25,62,100,112],"first":[26],"designed":[27],"to":[28,50,64,71,102,114,124,131],"jointly":[29],"estimate":[30],"the":[31,52,66,78,85,92,105,117,121,125,128,132,140,147,152],"system":[32,122],"state":[33,53,87,123],"and":[34,42,88,127,149],"parameter.":[35],"has":[38],"a":[39,72,94],"two-time-scale":[40],"structure":[41],"does":[43],"not":[44],"require":[45],"any":[46],"additional":[47],"numerical":[48],"techniques":[49],"calculate":[51],"derivative":[54],"information.":[55],"The":[56],"idea":[57],"of":[58,80,96,120,142,151],"(CL)":[61],"leveraged":[63],"use":[65],"recorded":[67],"data,":[68],"which":[69],"leads":[70],"relaxed":[73],"verifiable":[74],"excitation":[75,143],"condition":[76],"convergence":[79,119],"parameter":[81,89],"estimation.":[82],"Based":[83],"on":[84],"estimated":[86],"provided":[90],"by":[91],"CL-AEO,":[93],"simulation":[95],"experience-based":[97],"RL":[98],"scheme":[99],"developed":[101,129,153],"online":[103],"approximate":[104],"optimal":[106,134],"control":[107],"policy.":[108],"Rigorous":[109],"theoretical":[110],"analysis":[111],"given":[113],"show":[115],"that":[116],"practical":[118],"origin":[126],"policy":[130,135],"ideal":[133],"can":[136],"be":[137],"achieved":[138],"without":[139],"persistence":[141],"(PE)":[144],"condition.":[145],"Finally,":[146],"effectiveness":[148],"superiority":[150],"methodology":[154],"are":[155],"demonstrated":[156],"via":[157],"comparative":[158],"simulations.":[159]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":5}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
