{"id":"https://openalex.org/W4410639360","doi":"https://doi.org/10.1109/tpami.2025.3572807","title":"A New Accelerated Off-Policy Stochastic Preconditioned TD(0) Algorithm","display_name":"A New Accelerated Off-Policy Stochastic Preconditioned TD(0) Algorithm","publication_year":2025,"publication_date":"2025-05-23","ids":{"openalex":"https://openalex.org/W4410639360","doi":"https://doi.org/10.1109/tpami.2025.3572807","pmid":"https://pubmed.ncbi.nlm.nih.gov/40408195"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3572807","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3572807","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Weidong Liu","orcid":"https://orcid.org/0000-0002-5449-9180"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weidong Liu","raw_affiliation_strings":["School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-5449-9180","affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiahua Ma","orcid":"https://orcid.org/0009-0007-5932-8720"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahua Ma","raw_affiliation_strings":["School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-5932-8720","affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112199537","display_name":"Xiaojun Mao","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojun Mao","raw_affiliation_strings":["School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9362-508X","affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017728532","display_name":"Kejie Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kejie Tang","raw_affiliation_strings":["School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematical Sciences, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11609522,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"47","issue":"9","first_page":"8088","last_page":"8101"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10046","display_name":"Stability and Control of Uncertain Systems","score":0.9460999965667725,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10046","display_name":"Stability and Control of Uncertain Systems","score":0.9460999965667725,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9223999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6037372350692749},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5689405202865601},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4242629110813141}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6037372350692749},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5689405202865601},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4242629110813141}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3572807","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3572807","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:40408195","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40408195","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3383712898","display_name":null,"funder_award_id":"23QA1404600","funder_id":"https://openalex.org/F4320327803","funder_display_name":"Shanghai Rising-Star Program"},{"id":"https://openalex.org/G6988326232","display_name":null,"funder_award_id":"72495121","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7064089128","display_name":null,"funder_award_id":"12371273","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G72841529","display_name":null,"funder_award_id":"12422111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327803","display_name":"Shanghai Rising-Star Program","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1506342804","https://openalex.org/W1646707810","https://openalex.org/W2040738104","https://openalex.org/W2075268401","https://openalex.org/W2086161653","https://openalex.org/W2112264645","https://openalex.org/W2124599560","https://openalex.org/W2129935817","https://openalex.org/W2132351269","https://openalex.org/W2139418546","https://openalex.org/W2145339207","https://openalex.org/W2169982856","https://openalex.org/W2473364827","https://openalex.org/W2885549115","https://openalex.org/W2963433607","https://openalex.org/W2963616027","https://openalex.org/W2964123095","https://openalex.org/W2996964934","https://openalex.org/W3041202696","https://openalex.org/W3118861484","https://openalex.org/W3191746168","https://openalex.org/W6632901617","https://openalex.org/W6640533243","https://openalex.org/W6676320248","https://openalex.org/W6678114464","https://openalex.org/W6684488654","https://openalex.org/W6711807133","https://openalex.org/W6748638692","https://openalex.org/W6750027077","https://openalex.org/W6767092952","https://openalex.org/W6769873871","https://openalex.org/W6780559895","https://openalex.org/W6788375365","https://openalex.org/W6799777419","https://openalex.org/W6846327033","https://openalex.org/W6853116598","https://openalex.org/W6855163719"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109"],"abstract_inverted_index":{"In":[0],"this":[1],"article,":[2],"we":[3,51],"consider":[4],"policy":[5,58,65,78],"evaluation":[6,79],"in":[7,45,122],"off-policy":[8,77,126],"reinforcement":[9],"learning":[10],"and":[11,102,125],"propose":[12],"a":[13,34],"novel":[14],"procedure":[15,32,70,119,134],"(Stochastic":[16],"Preconditioned":[17],"Temporal":[18],"Difference":[19],"(SPTD))":[20],"that":[21,80,132],"achieves":[22],"the":[23,39,42,56,63,72,76,82,88,95,99,104,118],"optimal":[24,83,106],"convergence":[25],"rate":[26,84],"under":[27,87],"linear":[28,35],"function":[29],"approximation.":[30],"The":[31,114],"has":[33,81],"computational":[36],"complexity":[37],"of":[38,41,117],"dimension":[40],"feature":[43],"space":[44],"each":[46],"iteration.":[47],"Under":[48],"Markovian":[49],"sampling,":[50],"establish":[52],"finite-sample":[53],"rates":[54],"when":[55],"target":[57],"can":[59],"be":[60],"different":[61],"from":[62],"behavior":[64],"for":[66,75],"data":[67],"generation.":[68],"Our":[69],"is":[71,120],"first":[73,96],"algorithm":[74],"$\\mathcal":[85],"{O}(1/t)$O(1/t)":[86],"mean":[89],"square":[90],"error.":[91],"We":[92],"also":[93],"provide":[94],"result":[97],"on":[98],"asymptotic":[100],"distribution":[101],"give":[103],"nearly":[105],"step":[107],"size":[108],"$\\alpha":[109],"_{t}":[110],"=":[111],"\\mathcal":[112],"{O}(t^{-2/3})$\u03b1t=O(t-2/3).":[113],"numerical":[115,129],"performance":[116],"studied":[121],"both":[123],"on-policy":[124],"settings.":[127],"Extensive":[128],"experiments":[130],"demonstrate":[131],"our":[133],"uniformly":[135],"outperforms":[136],"existing":[137],"methods.":[138]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
