{"id":"https://openalex.org/W2139025507","doi":"https://doi.org/10.1109/tac.2018.2874687","title":"Stability of Stochastic Approximations With \u201cControlled Markov\u201d Noise and Temporal Difference Learning","display_name":"Stability of Stochastic Approximations With \u201cControlled Markov\u201d Noise and Temporal Difference Learning","publication_year":2018,"publication_date":"2018-10-09","ids":{"openalex":"https://openalex.org/W2139025507","doi":"https://doi.org/10.1109/tac.2018.2874687","mag":"2139025507"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2018.2874687","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2018.2874687","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1504.06043","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086784011","display_name":"Arunselvan Ramaswamy","orcid":"https://orcid.org/0000-0001-7547-8111"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Arunselvan Ramaswamy","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","Dept of Computer Science & Automation, Indian Institute of Science, Bangalore, India#TAB#"],"raw_orcid":"https://orcid.org/0000-0001-7547-8111","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Dept of Computer Science & Automation, Indian Institute of Science, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038163398","display_name":"Shalabh Bhatnagar","orcid":"https://orcid.org/0000-0001-7644-3914"},"institutions":[{"id":"https://openalex.org/I4210151956","display_name":"Robert Bosch (India)","ror":"https://ror.org/04my8ty22","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210151956","https://openalex.org/I889804353"]},{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shalabh Bhatnagar","raw_affiliation_strings":["Department of Computer Science and Auto-mation and the Robert Bosch Centre for Cyber Physical Systems, Indian Institute of Science, Bangalore, India"],"raw_orcid":"https://orcid.org/0000-0001-7644-3914","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Auto-mation and the Robert Bosch Centre for Cyber Physical Systems, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I4210151956","https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0035108,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"64","issue":"6","first_page":"2614","last_page":"2620"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8031659126281738},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6507096290588379},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6491319537162781},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.6340035200119019},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.5907789468765259},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.585068941116333},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.573377251625061},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5685912370681763},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5483832359313965},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.47588908672332764},{"id":"https://openalex.org/keywords/ergodic-theory","display_name":"Ergodic theory","score":0.44984930753707886},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4257556200027466},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4126529097557068},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.35276728868484497},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.30454903841018677},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06641161441802979}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8031659126281738},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6507096290588379},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6491319537162781},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.6340035200119019},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.5907789468765259},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.585068941116333},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.573377251625061},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5685912370681763},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5483832359313965},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.47588908672332764},{"id":"https://openalex.org/C122044880","wikidata":"https://www.wikidata.org/wiki/Q5498822","display_name":"Ergodic theory","level":2,"score":0.44984930753707886},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4257556200027466},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4126529097557068},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.35276728868484497},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30454903841018677},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06641161441802979},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/tac.2018.2874687","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2018.2874687","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},{"id":"pmh:oai:eprints.iisc.ac.in:63036","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196309","display_name":"NOT FOUND REPOSITORY (Indian Institute of Science Bangalore)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:arXiv.org:1504.06043","is_oa":true,"landing_page_url":"https://arxiv.org/abs/1504.06043","pdf_url":"https://arxiv.org/pdf/1504.06043","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2139025507","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1504.06043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1504.06043","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1504.06043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1504.06043","is_oa":true,"landing_page_url":"https://arxiv.org/abs/1504.06043","pdf_url":"https://arxiv.org/pdf/1504.06043","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7162391439","display_name":null,"funder_award_id":"315248657","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320310071","display_name":"Indian Institute of Science","ror":"https://ror.org/04dese585"},{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2139025507.pdf","grobid_xml":"https://content.openalex.org/works/W2139025507.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W594357522","https://openalex.org/W1491706803","https://openalex.org/W1514218977","https://openalex.org/W1568229137","https://openalex.org/W1576452626","https://openalex.org/W1964757264","https://openalex.org/W1967049706","https://openalex.org/W1991667331","https://openalex.org/W1997394097","https://openalex.org/W2064076655","https://openalex.org/W2067102124","https://openalex.org/W2071983464","https://openalex.org/W2125812768","https://openalex.org/W2139418546","https://openalex.org/W2962716894","https://openalex.org/W2964344217","https://openalex.org/W3102716280","https://openalex.org/W4210718478","https://openalex.org/W4243772471"],"related_works":["https://openalex.org/W2962741973","https://openalex.org/W3135895062","https://openalex.org/W2116997137","https://openalex.org/W2102195169","https://openalex.org/W1604547262","https://openalex.org/W2591204034","https://openalex.org/W2099724000","https://openalex.org/W2031440490","https://openalex.org/W3005705379","https://openalex.org/W2951582544","https://openalex.org/W3037807764","https://openalex.org/W2103935902","https://openalex.org/W3109048725","https://openalex.org/W2121446922","https://openalex.org/W19265801","https://openalex.org/W1529167716","https://openalex.org/W3113245028","https://openalex.org/W2738771169","https://openalex.org/W2922301831","https://openalex.org/W3195054069"],"abstract_inverted_index":{"We":[0],"are":[1],"interested":[2],"in":[3,103,131],"understanding":[4],"stability":[5,54,77],"(almost":[6],"sure":[7],"boundedness)":[8],"of":[9,23,57,146,158],"stochastic":[10],"approximation":[11],"algorithms":[12,24,32],"(SAs)":[13],"driven":[14,38,59],"by":[15,39,60],"a":[16,40,61,94,104,155],"\u201ccontrolled":[17,41,62],"Markov\u201d":[18,42,63],"process.":[19,43,64],"Analyzing":[20],"this":[21,45],"class":[22],"is":[25,125,137,150],"important,":[26],"since":[27],"many":[28],"reinforcement":[29],"learning":[30,124,160],"(RL)":[31],"can":[33],"be":[34,114],"cast":[35],"as":[36],"SAs":[37,58],"In":[44],"paper,":[46],"we":[47],"present":[48],"easily":[49],"verifiable":[50],"sufficient":[51],"conditions":[52],"for":[53,78,161],"and":[55,108],"convergence":[56],"Many":[65],"RL":[66],"applications":[67],"involve":[68],"continuous":[69,80,105],"state":[70,81,106],"spaces.":[71],"While":[72],"our":[73,91],"analysis":[74,92],"readily":[75],"ensures":[76],"such":[79],"applications,":[82],"traditional":[83],"analyses":[84],"do":[85],"not.":[86],"As":[87],"compared":[88],"to":[89,139,153],"literature,":[90],"presents":[93],"two-fold":[95],"generalization:":[96],"1)":[97],"the":[98,110],"Markov":[99],"process":[100,111],"may":[101],"evolve":[102],"space":[107],"2)":[109],"need":[112],"not":[113],"ergodic":[115],"under":[116],"any":[117],"given":[118],"stationary":[119],"policy.":[120],"Temporal":[121],"difference":[122],"(TD)":[123],"an":[126,143],"important":[127,144],"policy":[128],"evaluation":[129],"method":[130],"RL.":[132],"The":[133],"theory":[134,149],"developed":[135],"herein,":[136],"used":[138,152],"analyze":[140,154],"generalized":[141],"$\\text{TD}(0)$,":[142],"variant":[145],"TD.":[147],"Our":[148],"also":[151],"TD":[156],"formulation":[157],"supervised":[159],"forecasting":[162],"problems.":[163]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
