{"id":"https://openalex.org/W3012451757","doi":"https://doi.org/10.1137/20m1331524","title":"Is Temporal Difference Learning Optimal? An Instance-Dependent Analysis","display_name":"Is Temporal Difference Learning Optimal? An Instance-Dependent Analysis","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3012451757","doi":"https://doi.org/10.1137/20m1331524","mag":"3012451757"},"language":"en","primary_location":{"id":"doi:10.1137/20m1331524","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1331524","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1331524","source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1331524","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082704866","display_name":"Koulik Khamaru","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Koulik Khamaru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085869933","display_name":"Ashwin Pananjady","orcid":"https://orcid.org/0000-0003-0824-9815"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ashwin Pananjady","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101587259","display_name":"Feng Ruan","orcid":"https://orcid.org/0000-0002-5175-1491"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng Ruan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038379562","display_name":"Martin J. Wainwright","orcid":"https://orcid.org/0000-0002-8760-2236"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martin J. Wainwright","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5049812527","display_name":"Michael I. Jordan","orcid":"https://orcid.org/0000-0001-8935-817X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael I. Jordan","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0001-8935-817X","affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082704866"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.005104,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"3","issue":"4","first_page":"1013","last_page":"1040"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.824327290058136},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7112007737159729},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.7106437683105469},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.5946797728538513},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.565427839756012},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5112645626068115},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.4380652904510498},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.43459534645080566},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4317903220653534},{"id":"https://openalex.org/keywords/fraction","display_name":"Fraction (chemistry)","score":0.4313785433769226},{"id":"https://openalex.org/keywords/asymptotic-analysis","display_name":"Asymptotic analysis","score":0.42848023772239685},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.3970143496990204},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3907489478588104},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3439582288265228},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20631033182144165},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13949227333068848}],"concepts":[{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.824327290058136},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7112007737159729},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.7106437683105469},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.5946797728538513},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.565427839756012},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5112645626068115},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.4380652904510498},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.43459534645080566},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4317903220653534},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.4313785433769226},{"id":"https://openalex.org/C205147927","wikidata":"https://www.wikidata.org/wiki/Q752718","display_name":"Asymptotic analysis","level":2,"score":0.42848023772239685},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3970143496990204},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3907489478588104},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3439582288265228},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20631033182144165},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13949227333068848},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1137/20m1331524","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1331524","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1331524","source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2003.07337","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2003.07337","pdf_url":"https://arxiv.org/pdf/2003.07337","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3012451757","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2003.07337.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2003.07337","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2003.07337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1137/20m1331524","is_oa":true,"landing_page_url":"https://doi.org/10.1137/20m1331524","pdf_url":"https://epubs.siam.org/doi/pdf/10.1137/20M1331524","source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G2571710149","display_name":null,"funder_award_id":"DMS-2015454","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4967605941","display_name":null,"funder_award_id":"DMS-1612948","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8485495531","display_name":null,"funder_award_id":"DOD ONR-N00014-18-1-2640","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3012451757.pdf","grobid_xml":"https://content.openalex.org/works/W3012451757.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W59018853","https://openalex.org/W594357522","https://openalex.org/W1494502909","https://openalex.org/W1511694993","https://openalex.org/W1533375765","https://openalex.org/W1576452626","https://openalex.org/W1595541099","https://openalex.org/W1646707810","https://openalex.org/W1777141204","https://openalex.org/W1854990296","https://openalex.org/W1969276875","https://openalex.org/W1992208280","https://openalex.org/W1994616650","https://openalex.org/W2002444764","https://openalex.org/W2049742719","https://openalex.org/W2070086983","https://openalex.org/W2071983464","https://openalex.org/W2086161653","https://openalex.org/W2100677568","https://openalex.org/W2107438106","https://openalex.org/W2120339885","https://openalex.org/W2120678009","https://openalex.org/W2121703796","https://openalex.org/W2121863487","https://openalex.org/W2122701159","https://openalex.org/W2154172448","https://openalex.org/W2165131254","https://openalex.org/W2886712433","https://openalex.org/W2890347272","https://openalex.org/W2907502549","https://openalex.org/W2912099989","https://openalex.org/W2949578685","https://openalex.org/W2963472288","https://openalex.org/W2964054583","https://openalex.org/W2964123095","https://openalex.org/W2964988441","https://openalex.org/W2970553582","https://openalex.org/W2995999665","https://openalex.org/W3045718248","https://openalex.org/W3090442079","https://openalex.org/W3097999317","https://openalex.org/W3098956361","https://openalex.org/W3136903997","https://openalex.org/W3147513489"],"related_works":["https://openalex.org/W3107656456","https://openalex.org/W2965710643","https://openalex.org/W3083060963","https://openalex.org/W2972385320","https://openalex.org/W3194409765","https://openalex.org/W2950419842","https://openalex.org/W64335235","https://openalex.org/W3121423254","https://openalex.org/W2093862975","https://openalex.org/W3025837736","https://openalex.org/W3122123095","https://openalex.org/W3128147460","https://openalex.org/W3136541527","https://openalex.org/W2798989470","https://openalex.org/W2896520257","https://openalex.org/W3013471228","https://openalex.org/W3103182070","https://openalex.org/W2950828668","https://openalex.org/W2947012451","https://openalex.org/W3041957046"],"abstract_inverted_index":{"We":[0,23,74],"address":[1],"the":[2,17,52],"problem":[3],"of":[4,30,84],"policy":[5,36],"evaluation":[6],"in":[7,63],"discounted":[8],"Markov":[9],"decision":[10],"processes,":[11],"and":[12,27,80],"provide":[13],"instance-dependent":[14,41,92],"guarantees":[15],"on":[16],"$\\ell_\\infty$-error":[18],"under":[19],"a":[20,64],"generative":[21],"model.":[22],"establish":[24],"both":[25],"asymptotic":[26],"non-asymptotic":[28,65],"versions":[29],"local":[31],"minimax":[32],"lower":[33],"bounds":[34],"for":[35],"evaluation,":[37],"thereby":[38],"providing":[39],"an":[40],"baseline":[42],"by":[43,78],"which":[44],"to":[45,95],"compare":[46],"algorithms.":[47],"Theory-inspired":[48],"simulations":[49],"show":[50],"that":[51,88],"widely-used":[53],"temporal":[54],"difference":[55],"(TD)":[56],"algorithm":[57],"is":[58],"strictly":[59],"suboptimal":[60],"when":[61,68],"evaluated":[62],"setting,":[66],"even":[67],"combined":[69],"with":[70],"Polyak-Ruppert":[71],"iterate":[72],"averaging.":[73],"remedy":[75],"this":[76],"issue":[77],"introducing":[79],"analyzing":[81],"variance-reduced":[82],"forms":[83],"stochastic":[85],"approximation,":[86],"showing":[87],"they":[89],"achieve":[90],"non-asymptotic,":[91],"optimality":[93],"up":[94],"logarithmic":[96],"factors.":[97]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
