{"id":"https://openalex.org/W2052688942","doi":"https://doi.org/10.1109/tnnls.2014.2371046","title":"MEC\u2014A Near-Optimal Online Reinforcement Learning Algorithm for Continuous Deterministic Systems","display_name":"MEC\u2014A Near-Optimal Online Reinforcement Learning Algorithm for Continuous Deterministic Systems","publication_year":2014,"publication_date":"2014-12-02","ids":{"openalex":"https://openalex.org/W2052688942","doi":"https://doi.org/10.1109/tnnls.2014.2371046","mag":"2052688942","pmid":"https://pubmed.ncbi.nlm.nih.gov/25474812"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2014.2371046","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2014.2371046","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","State Key laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China#TAB#"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China#TAB#","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080193690","display_name":"Yuanheng Zhu","orcid":"https://orcid.org/0000-0001-5384-423X"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanheng Zhu","raw_affiliation_strings":["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","State Key laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China#TAB#"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China#TAB#","institution_ids":["https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100624298"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":11.7507,"has_fulltext":false,"cited_by_count":79,"citation_normalized_percentile":{"value":0.98927744,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"26","issue":"2","first_page":"346","last_page":"356"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.660824179649353},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6441363096237183},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.6020896434783936},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.6016918420791626},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5660337209701538},{"id":"https://openalex.org/keywords/greedy-algorithm","display_name":"Greedy algorithm","score":0.5367765426635742},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5255792737007141},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5223183035850525},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.4840906858444214},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.46718767285346985},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4640933871269226},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.4244271218776703},{"id":"https://openalex.org/keywords/time-complexity","display_name":"Time complexity","score":0.4221121370792389},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.39487871527671814},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3221200108528137},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2338525652885437},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.123898446559906}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.660824179649353},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6441363096237183},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.6020896434783936},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.6016918420791626},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5660337209701538},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.5367765426635742},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5255792737007141},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5223183035850525},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.4840906858444214},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.46718767285346985},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4640933871269226},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.4244271218776703},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.4221121370792389},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.39487871527671814},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3221200108528137},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2338525652885437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.123898446559906},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2014.2371046","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2014.2371046","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:25474812","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/25474812","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1731450854","display_name":null,"funder_award_id":"61273136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2015575040","display_name":null,"funder_award_id":"4122083","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G5605584058","display_name":null,"funder_award_id":"61034002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W21891419","https://openalex.org/W65193931","https://openalex.org/W1504212531","https://openalex.org/W1584101032","https://openalex.org/W1626155273","https://openalex.org/W1747856733","https://openalex.org/W1974883306","https://openalex.org/W1976256897","https://openalex.org/W1983089547","https://openalex.org/W1986078872","https://openalex.org/W1989855774","https://openalex.org/W1997603681","https://openalex.org/W1999035253","https://openalex.org/W1999678919","https://openalex.org/W2014729048","https://openalex.org/W2018547649","https://openalex.org/W2027029581","https://openalex.org/W2030038728","https://openalex.org/W2035018355","https://openalex.org/W2048687352","https://openalex.org/W2068949505","https://openalex.org/W2081758070","https://openalex.org/W2082691056","https://openalex.org/W2086297186","https://openalex.org/W2087400813","https://openalex.org/W2098035803","https://openalex.org/W2106907982","https://openalex.org/W2111980775","https://openalex.org/W2116488102","https://openalex.org/W2117056304","https://openalex.org/W2121863487","https://openalex.org/W2129670787","https://openalex.org/W2132858840","https://openalex.org/W2133120480","https://openalex.org/W2134540127","https://openalex.org/W2134569556","https://openalex.org/W2139416664","https://openalex.org/W2150923691","https://openalex.org/W2159571856","https://openalex.org/W2171754584","https://openalex.org/W2468519215","https://openalex.org/W2476930474","https://openalex.org/W2489939061","https://openalex.org/W4214717370","https://openalex.org/W4285719527","https://openalex.org/W6600885592","https://openalex.org/W6602690695","https://openalex.org/W6637888334","https://openalex.org/W6675945767","https://openalex.org/W6682443896","https://openalex.org/W6720036336"],"related_works":["https://openalex.org/W3183948672","https://openalex.org/W1531601525","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W3009457412","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W1598028160"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"the":[3,24,29,47,85,107,114],"first":[4],"probably":[5],"approximately":[6],"correct":[7],"(PAC)":[8],"algorithm":[9],"for":[10],"continuous":[11,48],"deterministic":[12],"systems":[13,124],"without":[14],"relying":[15],"on":[16],"any":[17],"system":[18],"dynamics":[19],"is":[20,61,92,135],"proposed.":[21],"It":[22],"combines":[23],"state":[25,49],"aggregation":[26],"technique":[27],"and":[28,33,82,125],"efficient":[30],"exploration":[31,81],"principle,":[32],"makes":[34],"high":[35],"utilization":[36],"of":[37,97,116,123],"online":[38],"observed":[39],"samples.":[40,56],"We":[41],"use":[42],"a":[43,65,93,136],"grid":[44],"to":[45,54,63],"partition":[46],"space":[50],"into":[51],"different":[52],"cells":[53],"save":[55],"A":[57],"near-upper":[58,66],"Q":[59,67],"operator":[60],"defined":[62],"produce":[64],"function":[68],"using":[69],"samples":[70],"in":[71,101,113],"each":[72],"cell.":[73],"The":[74,118],"corresponding":[75],"greedy":[76],"policy":[77,109],"effectively":[78],"balances":[79],"between":[80],"exploitation.":[83],"With":[84],"rigorous":[86],"analysis,":[87],"we":[88],"prove":[89],"that":[90,133],"there":[91],"polynomial":[94],"time":[95],"bound":[96],"executing":[98],"nonoptimal":[99],"actions":[100],"our":[102],"algorithm.":[103],"After":[104],"finite":[105],"steps,":[106],"final":[108],"reaches":[110],"near":[111],"optimal":[112],"framework":[115],"PAC.":[117],"implementation":[119],"requires":[120],"no":[121],"knowledge":[122],"has":[126],"less":[127],"computation":[128],"complexity.":[129],"Simulation":[130],"studies":[131],"confirm":[132],"it":[134],"better":[137],"performance":[138],"than":[139],"other":[140],"similar":[141],"PAC":[142],"algorithms.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":17},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":14},{"year":2015,"cited_by_count":9}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
