{"id":"https://openalex.org/W3019142787","doi":"https://doi.org/10.1109/tnnls.2020.2981377","title":"Actor\u2013Critic Learning Control With Regularization and Feature Selection in Policy Gradient Estimation","display_name":"Actor\u2013Critic Learning Control With Regularization and Feature Selection in Policy Gradient Estimation","publication_year":2020,"publication_date":"2020-04-21","ids":{"openalex":"https://openalex.org/W3019142787","doi":"https://doi.org/10.1109/tnnls.2020.2981377","mag":"3019142787","pmid":"https://pubmed.ncbi.nlm.nih.gov/32324571"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2020.2981377","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2020.2981377","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077743639","display_name":"Luntong Li","orcid":"https://orcid.org/0000-0002-5299-4858"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Luntong Li","raw_affiliation_strings":["Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064212043","display_name":"Dazi Li","orcid":"https://orcid.org/0000-0003-1610-6558"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dazi Li","raw_affiliation_strings":["Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102884128","display_name":"Tianheng Song","orcid":"https://orcid.org/0000-0001-8275-5535"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianheng Song","raw_affiliation_strings":["Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053112608","display_name":"Xin Xu","orcid":"https://orcid.org/0000-0003-3238-745X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Xu","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5077743639"],"corresponding_institution_ids":["https://openalex.org/I75390827"],"apc_list":null,"apc_paid":null,"fwci":1.3718,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.85078057,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"32","issue":"3","first_page":"1217","last_page":"1227"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10821","display_name":"Cardiovascular Function and Risk Factors","score":0.9448000192642212,"subfield":{"id":"https://openalex.org/subfields/2705","display_name":"Cardiology and Cardiovascular Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.7648853063583374},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7505090236663818},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6262136697769165},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5342345237731934},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5065470933914185},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5053034424781799},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.47055935859680176},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.4399799704551697},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3805255889892578},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33494454622268677},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2841593325138092}],"concepts":[{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.7648853063583374},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7505090236663818},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6262136697769165},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5342345237731934},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5065470933914185},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5053034424781799},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.47055935859680176},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.4399799704551697},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3805255889892578},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33494454622268677},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2841593325138092},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2020.2981377","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2020.2981377","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:32324571","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/32324571","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G336691117","display_name":null,"funder_award_id":"61873022","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3469485582","display_name":null,"funder_award_id":"4182045","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G4793972535","display_name":null,"funder_award_id":"XK1802-4","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5196624177","display_name":null,"funder_award_id":"61825305","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7457541925","display_name":null,"funder_award_id":"61751311","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W359568995","https://openalex.org/W594357522","https://openalex.org/W1523985187","https://openalex.org/W1524324710","https://openalex.org/W1980183459","https://openalex.org/W2000373737","https://openalex.org/W2009303086","https://openalex.org/W2009702064","https://openalex.org/W2033736734","https://openalex.org/W2063123975","https://openalex.org/W2073384958","https://openalex.org/W2094387729","https://openalex.org/W2096199223","https://openalex.org/W2106261932","https://openalex.org/W2112264645","https://openalex.org/W2113455337","https://openalex.org/W2118556122","https://openalex.org/W2121863487","https://openalex.org/W2139418546","https://openalex.org/W2145339207","https://openalex.org/W2151161180","https://openalex.org/W2155007355","https://openalex.org/W2155027007","https://openalex.org/W2165150801","https://openalex.org/W2169207653","https://openalex.org/W2169982856","https://openalex.org/W2172968643","https://openalex.org/W2173248099","https://openalex.org/W2509062068","https://openalex.org/W2586680856","https://openalex.org/W2735519198","https://openalex.org/W2796202072","https://openalex.org/W2950471160","https://openalex.org/W2963864421","https://openalex.org/W2964161785","https://openalex.org/W2964326866","https://openalex.org/W3017285694","https://openalex.org/W3102961917","https://openalex.org/W3104999911","https://openalex.org/W4211221179","https://openalex.org/W4243772471","https://openalex.org/W4249667877","https://openalex.org/W4285719527","https://openalex.org/W4301283118","https://openalex.org/W4302570325","https://openalex.org/W6612245137","https://openalex.org/W6676072908","https://openalex.org/W6677089629","https://openalex.org/W6682849425","https://openalex.org/W6683204974","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6696324988","https://openalex.org/W6740646713","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W4386564352","https://openalex.org/W2952668426"],"abstract_inverted_index":{"Actor-critic":[0],"(AC)":[1],"learning":[2,14,25,41,55],"control":[3,56],"architecture":[4],"has":[5],"been":[6,33],"regarded":[7],"as":[8],"an":[9],"important":[10],"framework":[11],"for":[12,63],"reinforcement":[13],"(RL)":[15],"with":[16,58,223],"continuous":[17],"states":[18],"and":[19,27,39,60,123,145,163,185,198,208],"actions.":[20],"In":[21,47,91],"order":[22],"to":[23,36,84],"improve":[24],"efficiency":[26],"convergence":[28,168],"property,":[29],"previous":[30],"works":[31],"have":[32],"mainly":[34],"devoted":[35],"solve":[37],"regularization":[38,59],"feature":[40,61,89,192],"problem":[42,108],"in":[43,67,195,206],"the":[44,68,81,86,99,115,119,124,127,139,142,148,170,177,196,203],"policy":[45,64,94,121,132,152],"evaluation.":[46],"this":[48],"article,":[49],"we":[50,146],"propose":[51],"a":[52,106],"novel":[53],"AC":[54,216],"method":[57],"selection":[62,193],"gradient":[65,153,155],"estimation":[66],"actor":[69,82,197,204],"network.":[70],"The":[71,157,167,183],"main":[72],"contribution":[73],"is":[74,78,114,126,173],"that":[75,109,189],"l<sub>1</sub>":[76,128],"-regularization":[77,129],"used":[79],"on":[80,176,218],"network":[83],"achieve":[85],"function":[87],"of":[88,118,131,141,151,169,179,202],"selection.":[90],"each":[92],"iteration,":[93],"parameters":[95],"are":[96],"updated":[97],"by":[98],"regularized":[100],"dual-averaging":[101],"(RDA)":[102],"technique,":[103],"which":[104],"solves":[105],"minimization":[107,143],"involves":[110],"two":[111],"terms:":[112],"one":[113],"running":[116],"average":[117],"past":[120],"gradients":[122],"other":[125],"term":[130],"parameters.":[133],"Our":[134],"algorithm":[135,172],"can":[136,160],"efficiently":[137],"calculate":[138],"solution":[140],"problem,":[144],"call":[147],"new":[149],"adaptation":[150],"RDA-policy":[154],"(RDA-PG).":[156],"proposed":[158,171],"RDA-PG":[159,190,211],"learn":[161],"stochastic":[162,181,207],"deterministic":[164,209],"near-optimal":[165],"policies.":[166],"established":[174],"based":[175],"theory":[178],"two-timescale":[180],"approximation.":[182],"simulation":[184],"experimental":[186],"results":[187],"show":[188],"performs":[191,212],"successfully":[194],"learns":[199],"sparse":[200],"representations":[201],"both":[205],"cases.":[210],"better":[213],"than":[214],"existing":[215],"algorithms":[217],"standard":[219],"RL":[220],"benchmark":[221],"problems":[222],"irrelevant":[224],"features":[225],"or":[226],"redundant":[227],"features.":[228]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
