{"id":"https://openalex.org/W4394804995","doi":"https://doi.org/10.1109/tnnls.2024.3378913","title":"Boosting On-Policy Actor\u2013Critic With Shallow Updates in Critic","display_name":"Boosting On-Policy Actor\u2013Critic With Shallow Updates in Critic","publication_year":2024,"publication_date":"2024-04-15","ids":{"openalex":"https://openalex.org/W4394804995","doi":"https://doi.org/10.1109/tnnls.2024.3378913","pmid":"https://pubmed.ncbi.nlm.nih.gov/38619961"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3378913","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3378913","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077743639","display_name":"Luntong Li","orcid":"https://orcid.org/0000-0002-5299-4858"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Luntong Li","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080193690","display_name":"Yuanheng Zhu","orcid":"https://orcid.org/0000-0001-5384-423X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanheng Zhu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, State Key Laboratory of Multimodal Artificial Intelligence Systems, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5077743639"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210100255","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.3475,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62077314,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"3","first_page":"5644","last_page":"5653"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9018884897232056},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.7150057554244995},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6381427049636841},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6197414398193359},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.6076790690422058},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5958343148231506},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5933929681777954},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5795391201972961},{"id":"https://openalex.org/keywords/trust-region","display_name":"Trust region","score":0.5795093774795532},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49303504824638367},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4893779158592224},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.433419793844223},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.41409456729888916},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36048510670661926},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.229917973279953},{"id":"https://openalex.org/keywords/law","display_name":"Law","score":0.12203282117843628}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9018884897232056},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.7150057554244995},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6381427049636841},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6197414398193359},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.6076790690422058},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5958343148231506},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5933929681777954},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5795391201972961},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.5795093774795532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49303504824638367},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4893779158592224},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.433419793844223},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.41409456729888916},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36048510670661926},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.229917973279953},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.12203282117843628},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C178635117","wikidata":"https://www.wikidata.org/wiki/Q747499","display_name":"RADIUS","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2024.3378913","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3378913","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38619961","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38619961","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6254270691","display_name":null,"funder_award_id":"62206281","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8179479325","display_name":null,"funder_award_id":"62293541","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W2072931156","https://openalex.org/W2075268401","https://openalex.org/W2094387729","https://openalex.org/W2106261932","https://openalex.org/W2145339207","https://openalex.org/W2493977830","https://openalex.org/W2586680856","https://openalex.org/W2912681837","https://openalex.org/W2919115771","https://openalex.org/W2923534560","https://openalex.org/W2963890729","https://openalex.org/W2966477753","https://openalex.org/W2989847975","https://openalex.org/W3046093665","https://openalex.org/W3089963978","https://openalex.org/W3110979110","https://openalex.org/W3118210634","https://openalex.org/W3147214434","https://openalex.org/W3175768999","https://openalex.org/W3207294509","https://openalex.org/W3207544809","https://openalex.org/W3207654079","https://openalex.org/W3209616537","https://openalex.org/W3210169707","https://openalex.org/W4200635426","https://openalex.org/W4205272462","https://openalex.org/W4214717370","https://openalex.org/W4221154894","https://openalex.org/W4243772471","https://openalex.org/W4252279978","https://openalex.org/W4283031789","https://openalex.org/W4299401133","https://openalex.org/W4310705874","https://openalex.org/W4392387995","https://openalex.org/W6612245137","https://openalex.org/W6638018090","https://openalex.org/W6683204974","https://openalex.org/W6738245236","https://openalex.org/W6748638692","https://openalex.org/W6758074843","https://openalex.org/W6771750343","https://openalex.org/W6776867236","https://openalex.org/W6777944346","https://openalex.org/W6783140480","https://openalex.org/W6784221857","https://openalex.org/W6793042568","https://openalex.org/W6809190652","https://openalex.org/W6838968391"],"related_works":["https://openalex.org/W4385488867","https://openalex.org/W4400868993","https://openalex.org/W2145363145","https://openalex.org/W2386410636","https://openalex.org/W2341346307","https://openalex.org/W2025663273","https://openalex.org/W2154399718","https://openalex.org/W3099153698","https://openalex.org/W2768629321","https://openalex.org/W3038962357"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,26,58],"learning":[2,23,27,59],"(DRL)":[3],"benefits":[4,29],"from":[5,30,151],"the":[6,16,22,66,101,111,114,138,149,152,155,162,173,180,194,203,207,215,219,231],"representation":[7,38,93,108],"power":[8],"of":[9,68,100,107,113,193,211,233,236],"deep":[10,48,98],"neural":[11],"networks":[12],"(NNs),":[13],"to":[14,64,76,136,146,175,206,218],"approximate":[15],"value":[17,84,90],"function":[18,85],"and":[19,33,39,83,92,117,214,242],"policy":[20,49,81,119,130,174,240,244],"in":[21,168,182,190],"process.":[24],"Batch":[25],"(BRL)":[28],"stable":[31],"training":[32],"data":[34],"efficiency":[35,235],"with":[36,61,121],"fixed":[37],"enjoys":[40],"solid":[41],"theoretical":[42],"analysis.":[43],"This":[44],"work":[45],"proposes":[46],"least-squares":[47,57,123],"gradient":[50,131,245],"(LSDPG),":[51],"a":[52,73,104,169,187],"hybrid":[53],"approach":[54],"that":[55],"combines":[56],"(RL)":[60],"online":[62],"DRL":[63,159],"achieve":[65],"best":[67],"both":[69],"worlds.":[70],"LSDPG":[71,87,96,183,237],"leverages":[72],"shared":[74],"network":[75],"share":[77],"useful":[78],"features":[79,150],"between":[80],"(actor)":[82],"(critic).":[86],"learns":[88],"policy,":[89,213],"function,":[91],"separately.":[94],"First,":[95],"views":[97],"NNs":[99],"critic":[102,153,163,181,195,204],"as":[103],"linear":[105],"combination":[106],"weighted":[109],"by":[110],"weights":[112],"last":[115],"layer":[116],"performs":[118],"evaluation":[120],"regularized":[122,208],"temporal":[124],"difference":[125],"(LSTD)":[126],"methods.":[127],"Second,":[128],"arbitrary":[129],"algorithms":[132,164],"can":[133],"be":[134,176],"applied":[135],"improve":[137],"policy.":[139,222],"Third,":[140],"an":[141],"auxiliary":[142],"task":[143],"is":[144,178,184],"used":[145,167],"periodically":[147],"distill":[148],"into":[154],"representation.":[156],"Unlike":[157],"most":[158],"methods,":[160],"where":[161],"are":[165],"often":[166],"nonstationary":[170],"situation,":[171],"i.e.,":[172],"evaluated":[177],"changing,":[179],"working":[185],"on":[186,226],"stationary":[188],"case":[189],"each":[191],"iteration":[192],"update.":[196],"We":[197],"prove":[198],"that,":[199],"under":[200],"some":[201],"conditions,":[202],"converges":[205,217],"TD":[209],"fixpoint":[210],"current":[212],"actor":[216],"local":[220],"optimal":[221],"The":[223],"experimental":[224],"results":[225],"challenging":[227],"Procgen":[228],"benchmark":[229],"illustrate":[230],"improvement":[232],"sample":[234],"over":[238],"proximal":[239],"optimization":[241],"phasic":[243],"(PPG).":[246]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
