{"id":"https://openalex.org/W4399767971","doi":"https://doi.org/10.1109/tcyb.2024.3401014","title":"Efficient Reinforcement Learning With the Novel N-Step Method and V-Network","display_name":"Efficient Reinforcement Learning With the Novel N-Step Method and V-Network","publication_year":2024,"publication_date":"2024-06-18","ids":{"openalex":"https://openalex.org/W4399767971","doi":"https://doi.org/10.1109/tcyb.2024.3401014","pmid":"https://pubmed.ncbi.nlm.nih.gov/38889043"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2024.3401014","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2024.3401014","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100357898","display_name":"Miaomiao Zhang","orcid":"https://orcid.org/0000-0001-9921-6511"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Miaomiao Zhang","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-9921-6511","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450395","display_name":"Shuo Zhang","orcid":"https://orcid.org/0000-0002-9589-5657"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Zhang","raw_affiliation_strings":["Institute of Knowledge Based Engineering, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9589-5657","affiliations":[{"raw_affiliation_string":"Institute of Knowledge Based Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xinying Wu","orcid":"https://orcid.org/0009-0005-6039-160X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinying Wu","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-6039-160X","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106407656","display_name":"Zhiyi Shi","orcid":"https://orcid.org/0009-0006-6866-784X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyi Shi","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069128351","display_name":"Xiangyang Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162215","display_name":"Naval Aeronautical and Astronautical University","ror":"https://ror.org/02j2yhq26","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210162215"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyang Deng","raw_affiliation_strings":["Department of Electronic Information Engineering, Naval Aeronautical University, Yantai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronic Information Engineering, Naval Aeronautical University, Yantai, China","institution_ids":["https://openalex.org/I4210162215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110956301","display_name":"Edmond Q. Wu","orcid":"https://orcid.org/0000-0003-1301-9870"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Edmond Q. Wu","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-1301-9870","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053112608","display_name":"Xin Xu","orcid":"https://orcid.org/0000-0003-3238-745X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Xu","raw_affiliation_strings":["School of Intelligent Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0003-3238-745X","affiliations":[{"raw_affiliation_string":"School of Intelligent Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8328,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87156902,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"54","issue":"10","first_page":"6048","last_page":"6057"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.6431000232696533,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.6431000232696533,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.5688999891281128,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12546","display_name":"Smart Parking Systems Research","score":0.5270000100135803,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6800172328948975},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.55162513256073},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5222852230072021},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40944862365722656},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.23138469457626343},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.09664416313171387}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6800172328948975},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.55162513256073},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5222852230072021},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40944862365722656},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.23138469457626343},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.09664416313171387}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2024.3401014","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2024.3401014","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:38889043","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38889043","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1266008770","display_name":null,"funder_award_id":"T2325018","funder_id":"https://openalex.org/F4320336125","funder_display_name":"National Science Fund for Distinguished Young Scholars"},{"id":"https://openalex.org/G241012663","display_name":null,"funder_award_id":"62171274","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8335343387","display_name":null,"funder_award_id":"U2241228","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null},{"id":"https://openalex.org/F4320336125","display_name":"National Science Fund for Distinguished Young Scholars","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W2093253120","https://openalex.org/W2158782408","https://openalex.org/W2548277951","https://openalex.org/W2596758708","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2904246096","https://openalex.org/W2923653485","https://openalex.org/W2963267001","https://openalex.org/W2963864421","https://openalex.org/W3000642679","https://openalex.org/W3006670279","https://openalex.org/W3035064526","https://openalex.org/W3041202696","https://openalex.org/W3114551027","https://openalex.org/W3119430426","https://openalex.org/W3130292943","https://openalex.org/W3134939669","https://openalex.org/W3138794164","https://openalex.org/W3170872007","https://openalex.org/W3194194466","https://openalex.org/W3195968524","https://openalex.org/W3196755144","https://openalex.org/W3199468096","https://openalex.org/W3207654079","https://openalex.org/W3211437739","https://openalex.org/W3212087381","https://openalex.org/W4224220194","https://openalex.org/W4226056984","https://openalex.org/W4226286144","https://openalex.org/W4226493407","https://openalex.org/W4281480521","https://openalex.org/W4286253093","https://openalex.org/W4298857966","https://openalex.org/W4302552086","https://openalex.org/W4320919539","https://openalex.org/W4321021825","https://openalex.org/W4366736145","https://openalex.org/W4382119038","https://openalex.org/W4388052875","https://openalex.org/W6637967152","https://openalex.org/W6683195989","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6713603661","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6759918608","https://openalex.org/W6784938585","https://openalex.org/W6788575936","https://openalex.org/W6809504054","https://openalex.org/W6838701874","https://openalex.org/W6851277581"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"The":[0,133],"application":[1],"of":[2,25,32,53,68,82,87,118,135,143],"reinforcement":[3],"learning":[4],"(RL)":[5],"in":[6,71,94,103,151,157],"artificial":[7],"intelligence":[8],"has":[9,130],"become":[10],"increasingly":[11],"widespread.":[12],"However,":[13],"its":[14],"drawbacks":[15],"are":[16],"also":[17],"apparent,":[18],"as":[19],"it":[20,57],"requires":[21],"a":[22,35,44,122],"large":[23],"number":[24],"samples":[26],"for":[27],"support,":[28],"making":[29],"the":[30,51,54,66,79,88,115,127,141,165,187],"enhancement":[31],"sample":[33,145],"efficiency":[34,146],"research":[36],"focus.":[37],"To":[38,113],"address":[39],"this":[40,74],"issue,":[41],"we":[42,120],"propose":[43],"novel":[45,167],"N-step":[46,75,168],"method.":[47],"This":[48],"method":[49,76,124],"extends":[50],"horizon":[52],"agent,":[55],"enabling":[56],"to":[58,91,110],"acquire":[59],"more":[60],"long-term":[61],"effective":[62],"information,":[63],"thus":[64],"resolving":[65],"issue":[67],"data":[69],"inefficiency":[70],"RL.":[72,152],"Additionally,":[73],"can":[77],"reduce":[78],"estimation":[80,92,101,105,111,116,150],"variance":[81],"Q-function,":[83,119],"which":[84,129],"is":[85,106,183],"one":[86],"factors":[89],"contributing":[90],"errors":[93],"Q-function":[95,104,149],"estimation.":[96],"Apart":[97],"from":[98],"high":[99],"variance,":[100],"bias":[102,117],"another":[107],"factor":[108],"leading":[109],"errors.":[112],"mitigate":[114],"design":[121],"regularization":[123],"based":[125],"on":[126],"V-function,":[128],"been":[131],"underexplored.":[132],"combination":[134],"these":[136],"two":[137],"methods":[138],"perfectly":[139],"addresses":[140],"problems":[142],"low":[144],"and":[147,159,180],"inaccurate":[148],"Finally,":[153],"extensive":[154],"experiments":[155],"conducted":[156],"discrete":[158],"continuous":[160],"action":[161],"spaces":[162],"demonstrate":[163],"that":[164],"proposed":[166],"method,":[169],"when":[170],"combined":[171],"with":[172],"classical":[173,188],"deep":[174,176],"Q-network,":[175],"deterministic":[177],"policy":[178],"gradient,":[179],"TD3":[181],"algorithms,":[182],"effective,":[184],"consistently":[185],"outperforming":[186],"algorithms.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
