{"id":"https://openalex.org/W3212756364","doi":"https://doi.org/10.1109/tsp.2021.3128723","title":"Finite-Time Error Bounds of Biased Stochastic Approximation With Application to TD-Learning","display_name":"Finite-Time Error Bounds of Biased Stochastic Approximation With Application to TD-Learning","publication_year":2021,"publication_date":"2021-11-17","ids":{"openalex":"https://openalex.org/W3212756364","doi":"https://doi.org/10.1109/tsp.2021.3128723","mag":"3212756364"},"language":"en","primary_location":{"id":"doi:10.1109/tsp.2021.3128723","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsp.2021.3128723","pdf_url":null,"source":{"id":"https://openalex.org/S168680287","display_name":"IEEE Transactions on Signal Processing","issn_l":"1053-587X","issn":["1053-587X","1941-0476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100367403","display_name":"Gang Wang","orcid":"https://orcid.org/0000-0002-7266-2412"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I50632499","display_name":"Chongqing University of Technology","ror":"https://ror.org/04vgbd477","country_code":"CN","type":"education","lineage":["https://openalex.org/I50632499"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Gang Wang","raw_affiliation_strings":["State Key Lab of Intelligent Control and Decision of Complex Systems and the School of Automation, Beijing Institute of Technology, Beijing, China","Beijing Institute of Technology Chongqing Innovation Center, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab of Intelligent Control and Decision of Complex Systems and the School of Automation, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Beijing Institute of Technology Chongqing Innovation Center, Chongqing, China","institution_ids":["https://openalex.org/I50632499","https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100367403"],"corresponding_institution_ids":["https://openalex.org/I125839683","https://openalex.org/I50632499"],"apc_list":null,"apc_paid":null,"fwci":0.4079,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.69636677,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"70","issue":null,"first_page":"950","last_page":"962"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stochastic-approximation","display_name":"Stochastic approximation","score":0.6197560429573059},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5872271656990051},{"id":"https://openalex.org/keywords/ergodicity","display_name":"Ergodicity","score":0.5714494585990906},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.568955659866333},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5463869571685791},{"id":"https://openalex.org/keywords/iterated-function","display_name":"Iterated function","score":0.5406417846679688},{"id":"https://openalex.org/keywords/sublinear-function","display_name":"Sublinear function","score":0.5197715163230896},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.5176742076873779},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.4766588807106018},{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.4641130566596985},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4176880419254303},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3897733688354492},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.38786715269088745},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3656062185764313},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.1717812716960907},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14661049842834473},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1278688907623291},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.09834128618240356}],"concepts":[{"id":"https://openalex.org/C55479107","wikidata":"https://www.wikidata.org/wiki/Q97663916","display_name":"Stochastic approximation","level":3,"score":0.6197560429573059},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5872271656990051},{"id":"https://openalex.org/C201779956","wikidata":"https://www.wikidata.org/wiki/Q5426803","display_name":"Ergodicity","level":2,"score":0.5714494585990906},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.568955659866333},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5463869571685791},{"id":"https://openalex.org/C140479938","wikidata":"https://www.wikidata.org/wiki/Q5254619","display_name":"Iterated function","level":2,"score":0.5406417846679688},{"id":"https://openalex.org/C117160843","wikidata":"https://www.wikidata.org/wiki/Q338652","display_name":"Sublinear function","level":2,"score":0.5197715163230896},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5176742076873779},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.4766588807106018},{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.4641130566596985},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4176880419254303},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3897733688354492},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.38786715269088745},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3656062185764313},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.1717812716960907},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14661049842834473},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1278688907623291},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.09834128618240356},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsp.2021.3128723","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsp.2021.3128723","pdf_url":null,"source":{"id":"https://openalex.org/S168680287","display_name":"IEEE Transactions on Signal Processing","issn_l":"1053-587X","issn":["1053-587X","1941-0476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2059183635","display_name":null,"funder_award_id":"61925303","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2575069074","display_name":null,"funder_award_id":"62088101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2923177588","display_name":null,"funder_award_id":"62173034","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6840916361","display_name":null,"funder_award_id":"2021ZX4100027","funder_id":"https://openalex.org/F4320323172","funder_display_name":"Natural Science Foundation of Chongqing"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323172","display_name":"Natural Science Foundation of Chongqing","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1646707810","https://openalex.org/W1941445455","https://openalex.org/W1994616650","https://openalex.org/W2030019188","https://openalex.org/W2103034645","https://openalex.org/W2139418546","https://openalex.org/W2145339207","https://openalex.org/W2885208219","https://openalex.org/W2885549115","https://openalex.org/W2892060576","https://openalex.org/W2897742256","https://openalex.org/W2912171307","https://openalex.org/W2951777528","https://openalex.org/W2963333577","https://openalex.org/W2963616027","https://openalex.org/W2964123095","https://openalex.org/W2969218621","https://openalex.org/W2981237928","https://openalex.org/W2991859550","https://openalex.org/W3021312661","https://openalex.org/W3021959079","https://openalex.org/W3024414306","https://openalex.org/W3034190797","https://openalex.org/W3037764089","https://openalex.org/W3041202696","https://openalex.org/W3106022061","https://openalex.org/W3123661679","https://openalex.org/W3130631752","https://openalex.org/W3146077118","https://openalex.org/W3153673236","https://openalex.org/W3173526354","https://openalex.org/W3185561982","https://openalex.org/W3205579016","https://openalex.org/W3206576350","https://openalex.org/W4211192810","https://openalex.org/W4224862297","https://openalex.org/W4233061323","https://openalex.org/W4238852866","https://openalex.org/W4297329999","https://openalex.org/W4302617909","https://openalex.org/W4381327649","https://openalex.org/W6680307681","https://openalex.org/W6752164047","https://openalex.org/W6754775355","https://openalex.org/W6757967233","https://openalex.org/W6758937863","https://openalex.org/W6763578770","https://openalex.org/W6771753067","https://openalex.org/W6780179245","https://openalex.org/W6785824224","https://openalex.org/W6790721003","https://openalex.org/W6792279347","https://openalex.org/W6802536806"],"related_works":["https://openalex.org/W2062906109","https://openalex.org/W2979432606","https://openalex.org/W2602600938","https://openalex.org/W3119329039","https://openalex.org/W2603269483","https://openalex.org/W4306703873","https://openalex.org/W2964123095","https://openalex.org/W1553308207","https://openalex.org/W3119645134","https://openalex.org/W2772472730"],"abstract_inverted_index":{"Motivated":[0],"by":[1,110],"the":[2,26,45,52,60,63,74,78,120,133,139,146,154],"recent":[3],"success":[4],"of":[5,14,62,80,89,96,135,181],"reinforcement":[6],"learning":[7],"algorithms,":[8],"this":[9],"paper":[10],"studies":[11],"a":[12,21,32,56,94],"class":[13],"biased":[15,90],"stochastic":[16,46,97],"approximation":[17],"(SA)":[18],"procedures":[19],"under":[20,93,119,167],"mild":[22],"&#x201C;ergodicity-like&#x201D;":[23],"assumption":[24],"on":[25,31,59,73],"random":[27],"noise":[28],"sequence.":[29],"Building":[30],"multistep":[33],"Lyapunov":[34],"function":[35,117],"that":[36,141],"looks":[37],"ahead":[38],"to":[39,43,69,113,153,184,190],"several":[40],"future":[41],"updates":[42],"accommodate":[44],"perturbations":[47,98],"(thus":[48],"gaining":[49],"control":[50],"over":[51],"bias),":[53],"we":[54,106],"prove":[55],"general":[57],"result":[58],"convergence":[61],"SA":[64,91],"iterates,":[65],"and":[66,171],"use":[67],"it":[68,142],"derive":[70],"non-asymptotic":[71],"bounds":[72,109],"mean-square":[75],"error":[76,128],"in":[77,138],"case":[79],"constant":[81],"stepsizes.":[82],"This":[83],"novel":[84],"viewpoint":[85],"renders":[86],"finite-time":[87,127],"analysis":[88],"algorithms":[92],"family":[95],"possible.":[99],"For":[100],"direct":[101],"comparison":[102],"with":[103,115],"prior":[104],"work,":[105],"demonstrate":[107],"these":[108],"applying":[111],"them":[112],"TD-learning":[114,131],"linear":[116],"approximation,":[118],"Markov":[121,164],"chain":[122],"observation":[123],"model.":[124],"The":[125],"resultant":[126],"bound":[129],"for":[130,145,163,187],"is":[132],"first":[134],"its":[136],"kind,":[137],"sense":[140],"holds":[143],"i)":[144],"unmodified":[147],"versions":[148],"(i.e.,":[149],"without":[150],"any":[151,175],"modification":[152],"updates)":[155],"using":[156],"even":[157],"nonlinear":[158],"approximators;":[159],"as":[160,162],"well":[161],"chains":[165],"ii)":[166],"sublinear":[168],"mixing":[169],"conditions":[170],"iii)":[172],"starting":[173],"from":[174],"initial":[176],"distribution,":[177],"at":[178],"least":[179],"one":[180],"which":[182],"has":[183],"be":[185,191],"violated":[186],"existing":[188],"results":[189],"applicable.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
