{"id":"https://openalex.org/W4390938885","doi":"https://doi.org/10.1109/tac.2024.3355326","title":"Final Iteration Convergence Bound of Q-Learning: Switching System Approach","display_name":"Final Iteration Convergence Bound of Q-Learning: Switching System Approach","publication_year":2024,"publication_date":"2024-01-17","ids":{"openalex":"https://openalex.org/W4390938885","doi":"https://doi.org/10.1109/tac.2024.3355326"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2024.3355326","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tac.2024.3355326","pdf_url":"https://ieeexplore.ieee.org/ielx7/9/4601496/10402068.pdf","source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ieeexplore.ieee.org/ielx7/9/4601496/10402068.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100654317","display_name":"Donghwan Lee","orcid":"https://orcid.org/0000-0002-4962-8478"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Donghwan Lee","raw_affiliation_strings":["Department of Electrical Engineering, KAIST, Daejeon, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, KAIST, Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100654317"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":null,"apc_paid":null,"fwci":0.9058,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.69117607,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"69","issue":"7","first_page":"4765","last_page":"4772"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10323","display_name":"Analog and Mixed-Signal Circuit Design","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10323","display_name":"Analog and Mixed-Signal Circuit Design","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10621","display_name":"Gene Regulatory Network Analysis","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9682999849319458,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6926925182342529},{"id":"https://openalex.org/keywords/iterated-function","display_name":"Iterated function","score":0.6926174163818359},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.6775227785110474},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6350593566894531},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.6060435771942139},{"id":"https://openalex.org/keywords/iterative-learning-control","display_name":"Iterative learning control","score":0.5928187370300293},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5826747417449951},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.5704105496406555},{"id":"https://openalex.org/keywords/probably-approximately-correct-learning","display_name":"Probably approximately correct learning","score":0.4624815881252289},{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.46002545952796936},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.42764484882354736},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3747730553150177},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3680298924446106},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35940366983413696},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2513759136199951},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.21447011828422546},{"id":"https://openalex.org/keywords/computational-learning-theory","display_name":"Computational learning theory","score":0.18875688314437866},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.12819337844848633}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6926925182342529},{"id":"https://openalex.org/C140479938","wikidata":"https://www.wikidata.org/wiki/Q5254619","display_name":"Iterated function","level":2,"score":0.6926174163818359},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.6775227785110474},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6350593566894531},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.6060435771942139},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.5928187370300293},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5826747417449951},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.5704105496406555},{"id":"https://openalex.org/C176248197","wikidata":"https://www.wikidata.org/wiki/Q458526","display_name":"Probably approximately correct learning","level":4,"score":0.4624815881252289},{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.46002545952796936},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.42764484882354736},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3747730553150177},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3680298924446106},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35940366983413696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2513759136199951},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.21447011828422546},{"id":"https://openalex.org/C50292564","wikidata":"https://www.wikidata.org/wiki/Q2462783","display_name":"Computational learning theory","level":3,"score":0.18875688314437866},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.12819337844848633},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2024.3355326","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tac.2024.3355326","pdf_url":"https://ieeexplore.ieee.org/ielx7/9/4601496/10402068.pdf","source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/tac.2024.3355326","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tac.2024.3355326","pdf_url":"https://ieeexplore.ieee.org/ielx7/9/4601496/10402068.pdf","source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G463275642","display_name":null,"funder_award_id":"2022-0-00469","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G4700831490","display_name":null,"funder_award_id":"2022-","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G4767197758","display_name":null,"funder_award_id":"2022-0-00469","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G5824047699","display_name":null,"funder_award_id":"2022-0-0046","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390938885.pdf","grobid_xml":"https://content.openalex.org/works/W4390938885.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1499021337","https://openalex.org/W1534096605","https://openalex.org/W1552094772","https://openalex.org/W1999254175","https://openalex.org/W2062974796","https://openalex.org/W2071983464","https://openalex.org/W2077343054","https://openalex.org/W2145339207","https://openalex.org/W2150147323","https://openalex.org/W2151661095","https://openalex.org/W2291973609","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2973229164","https://openalex.org/W3009585715","https://openalex.org/W3041202696","https://openalex.org/W3127825666","https://openalex.org/W3207876914","https://openalex.org/W4214717370","https://openalex.org/W4233061323","https://openalex.org/W4233696721","https://openalex.org/W4251293356","https://openalex.org/W4288351606","https://openalex.org/W4382064684","https://openalex.org/W6638018090","https://openalex.org/W6677067356","https://openalex.org/W6677826109","https://openalex.org/W6678494045","https://openalex.org/W6683300800","https://openalex.org/W6685444567","https://openalex.org/W6696783566","https://openalex.org/W6741002519","https://openalex.org/W6743500281","https://openalex.org/W6763048320","https://openalex.org/W6773637655","https://openalex.org/W6774445294","https://openalex.org/W6785894376","https://openalex.org/W6789785855"],"related_works":["https://openalex.org/W2761624296","https://openalex.org/W4386994694","https://openalex.org/W4388738109","https://openalex.org/W2362901947","https://openalex.org/W2362086884","https://openalex.org/W1606071314","https://openalex.org/W2473609169","https://openalex.org/W2329573185","https://openalex.org/W4390938885","https://openalex.org/W2149166950"],"abstract_inverted_index":{"Q-learning":[0,35,49,150,184],"is":[1,115,120],"known":[2],"as":[3,50,123],"one":[4],"of":[5,18,48,84,113,133,149,201],"the":[6,16,22,46,65,78,85,91,102,106,110,131,146,153,166,177],"fundamental":[7],"reinforcement":[8],"learning":[9],"(RL)":[10],"algorithms.":[11,135,205],"Its":[12],"convergence":[13,103],"has":[14],"been":[15],"focus":[17],"extensive":[19],"research":[20],"over":[21],"past":[23],"several":[24],"decades.":[25],"Recently,":[26],"a":[27,39,51,60,124,141,195],"new":[28,196],"finite-time":[29,61,142,199],"error":[30,62,82,143,159],"bound":[31,63,144],"and":[32,119,127,169,191],"analysis":[33,79,200],"for":[34,198],"was":[36],"introduced":[37],"using":[38,68],"switching":[40,54,154,189],"system":[41,155],"framework.":[42,156],"This":[43],"approach":[44],"views":[45],"dynamics":[47],"discrete-time":[52,188],"stochastic":[53],"system.":[55],"The":[56,157],"prior":[57],"study":[58],"established":[59],"on":[64,81,145,152,183],"averaged":[66,86],"iterates":[67],"Lyapunov":[69],"functions,":[70],"offering":[71],"further":[72],"insights":[73,182],"into":[74],"Q-learning.":[75],"While":[76],"valuable,":[77],"focuses":[80],"bounds":[83,160],"iterate,":[87,108],"which":[88,99],"comes":[89],"with":[90,187],"inherent":[92],"disadvantages:":[93],"it":[94],"necessitates":[95],"extra":[96],"averaging":[97],"steps,":[98],"can":[100,192],"decelerate":[101],"rate.":[104],"Moreover,":[105],"final":[107,147],"being":[109],"original":[111],"format":[112],"Q-learning,":[114],"more":[116,125,202],"commonly":[117],"used":[118],"often":[121],"regarded":[122],"intuitive":[126],"natural":[128],"form":[129],"in":[130],"majority":[132],"iterative":[134],"In":[136],"this":[137],"paper,":[138],"we":[139,174],"present":[140,194],"iterate":[148],"based":[151],"proposed":[158,178],"have":[161],"different":[162,171],"features":[163],"compared":[164],"to":[165],"previous":[167],"works,":[168],"cover":[170],"scenarios.":[172],"Finally,":[173],"expect":[175],"that":[176],"results":[179],"provide":[180],"additional":[181],"via":[185],"connections":[186],"systems,":[190],"potentially":[193],"template":[197],"general":[203],"RL":[204]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
