{"id":"https://openalex.org/W3119408560","doi":"https://doi.org/10.1109/tsmc.2020.3043584","title":"Online Sparse Temporal Difference Learning Based on Nested Optimization and Regularized Dual Averaging","display_name":"Online Sparse Temporal Difference Learning Based on Nested Optimization and Regularized Dual Averaging","publication_year":2021,"publication_date":"2021-01-13","ids":{"openalex":"https://openalex.org/W3119408560","doi":"https://doi.org/10.1109/tsmc.2020.3043584","mag":"3119408560"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2020.3043584","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2020.3043584","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102884128","display_name":"Tianheng Song","orcid":"https://orcid.org/0000-0001-8275-5535"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianheng Song","raw_affiliation_strings":["Department of Automation, Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064212043","display_name":"Dazi Li","orcid":"https://orcid.org/0000-0003-1610-6558"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dazi Li","raw_affiliation_strings":["Department of Automation, Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053112608","display_name":"Xin Xu","orcid":"https://orcid.org/0000-0003-3238-745X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Xu","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102884128"],"corresponding_institution_ids":["https://openalex.org/I75390827"],"apc_list":null,"apc_paid":null,"fwci":0.4199,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.67248506,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"52","issue":"4","first_page":"2042","last_page":"2052"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.582381546497345},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5387428998947144},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.5228031873703003},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5057246685028076},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5046948194503784},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.4828808605670929},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.45076119899749756},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4444875419139862},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.44238734245300293},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4158293604850769},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.412239670753479},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41060972213745117},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.31995686888694763},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.16746783256530762},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.1143370270729065}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.582381546497345},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5387428998947144},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.5228031873703003},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5057246685028076},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5046948194503784},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.4828808605670929},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45076119899749756},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4444875419139862},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44238734245300293},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4158293604850769},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.412239670753479},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41060972213745117},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.31995686888694763},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.16746783256530762},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.1143370270729065},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2020.3043584","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2020.3043584","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5099999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G336691117","display_name":null,"funder_award_id":"61873022","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3469485582","display_name":null,"funder_award_id":"4182045","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G5196624177","display_name":null,"funder_award_id":"61825305","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7457541925","display_name":null,"funder_award_id":"61751311","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1460549585","https://openalex.org/W1646707810","https://openalex.org/W1941445455","https://openalex.org/W2046513829","https://openalex.org/W2071983464","https://openalex.org/W2075268401","https://openalex.org/W2088542838","https://openalex.org/W2102982586","https://openalex.org/W2104753538","https://openalex.org/W2112264645","https://openalex.org/W2118556122","https://openalex.org/W2121703796","https://openalex.org/W2145339207","https://openalex.org/W2169982856","https://openalex.org/W2177967097","https://openalex.org/W2188393917","https://openalex.org/W2259258048","https://openalex.org/W2294797721","https://openalex.org/W2338351427","https://openalex.org/W2345097924","https://openalex.org/W2625332229","https://openalex.org/W2793683455","https://openalex.org/W2891654323","https://openalex.org/W2911573951","https://openalex.org/W2953189830","https://openalex.org/W2963761387","https://openalex.org/W2968805005","https://openalex.org/W3017285694","https://openalex.org/W3100789280","https://openalex.org/W4214717370","https://openalex.org/W4242606736","https://openalex.org/W4292363360","https://openalex.org/W6676641278","https://openalex.org/W6677089629","https://openalex.org/W6677370284","https://openalex.org/W6677984395","https://openalex.org/W6679599195","https://openalex.org/W6683356630","https://openalex.org/W6686770810","https://openalex.org/W6738577479","https://openalex.org/W6738706147","https://openalex.org/W7065010408"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W4297676672","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4281702477","https://openalex.org/W2490526372","https://openalex.org/W4376166922","https://openalex.org/W3155717344","https://openalex.org/W1770458422"],"abstract_inverted_index":{"In":[0,62],"policy":[1,74],"evaluation":[2,75],"of":[3,30,94,162,166],"reinforcement":[4],"learning":[5,11,60,71,163],"tasks,":[6],"the":[7,43,95,125,146,160,167],"temporal":[8],"difference":[9],"(TD)":[10],"with":[12,100,114,156],"value":[13,31],"function":[14,32,107],"approximation":[15,33],"has":[16,23],"been":[17],"widely":[18],"studied.":[19],"However,":[20],"feature":[21,44,87,103],"representation":[22],"a":[24,111],"decisive":[25],"influence":[26],"on":[27,154],"both":[28],"accuracy":[29,57],"and":[34,47,54,124,136,164],"convergence":[35,143],"rate.":[36],"Therefore,":[37],"it":[38],"is":[39,98,108],"important":[40],"to":[41,102],"develop":[42],"selection":[45],"theory":[46],"methods":[48],"that":[49],"can":[50],"efficiently":[51],"prevent":[52],"overfitting":[53],"improve":[55],"estimation":[56],"in":[58],"TD":[59,70],"algorithms.":[61,169],"this":[63],"article,":[64],"we":[65],"propose":[66],"an":[67],"online":[68],"sparse":[69],"algorithm":[72,97],"for":[73,86,145],"by":[76,131],"using":[77],"<inline-formula":[78,115],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[79,116],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[80,117],"<tex-math":[81,118],"notation=\"LaTeX\">$\\ell":[82,119],"_{1}$":[83,120],"</tex-math></inline-formula>":[84,121],"-regualrization":[85],"selection.":[88],"The":[89,105,142,152],"per-step-time":[90],"runtime":[91],"computational":[92],"complexity":[93],"proposed":[96,168],"linear":[99],"respect":[101],"dimension.":[104],"loss":[106],"defined":[109],"as":[110],"nested":[112],"optimization":[113],"-regularization":[122],"penalty,":[123],"solver":[126],"minimizes":[127],"two":[128],"suboptimization":[129],"problems":[130],"running":[132],"stochastic":[133],"gradient":[134],"descent":[135],"regularized":[137],"dual":[138],"averaging":[139],"method,":[140],"alternately.":[141],"results":[144],"fixed":[147],"points":[148],"are":[149],"also":[150],"established.":[151],"experiments":[153],"benchmarks":[155],"high-dimensional":[157],"features":[158],"show":[159],"abilities":[161],"generalization":[165]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
