{"id":"https://openalex.org/W4411688074","doi":"https://doi.org/10.1109/tnnls.2025.3581217","title":"Neighboring State-Aware Policy for Deep Reinforcement Learning","display_name":"Neighboring State-Aware Policy for Deep Reinforcement Learning","publication_year":2025,"publication_date":"2025-06-26","ids":{"openalex":"https://openalex.org/W4411688074","doi":"https://doi.org/10.1109/tnnls.2025.3581217","pmid":"https://pubmed.ncbi.nlm.nih.gov/40569805"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3581217","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3581217","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100771701","display_name":"Meng Xu","orcid":"https://orcid.org/0000-0003-4857-5439"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Meng Xu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0003-4857-5439","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071933192","display_name":"Xinhong Chen","orcid":"https://orcid.org/0000-0002-8563-148X"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xinhong Chen","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-8563-148X","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100628454","display_name":"Guanyi Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Guanyi Zhao","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101700401","display_name":"Zihao Wen","orcid":"https://orcid.org/0000-0002-5401-5393"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zihao Wen","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111335760","display_name":"Weiwei Fu","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Weiwei Fu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100356291","display_name":"Jianping Wang","orcid":"https://orcid.org/0000-0002-9318-1482"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jianping Wang","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-9318-1482","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7588,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87186241,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"36","issue":"10","first_page":"18188","last_page":"18202"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2581999897956848,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2581999897956848,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8315799832344055},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6511847972869873},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5039793848991394},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47237303853034973},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3269615173339844},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.321863055229187},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2870342433452606},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.13697782158851624}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8315799832344055},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6511847972869873},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5039793848991394},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47237303853034973},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3269615173339844},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.321863055229187},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2870342433452606},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.13697782158851624},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3581217","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3581217","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40569805","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40569805","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7643524006","display_name":null,"funder_award_id":"R5060-19","funder_id":"https://openalex.org/F4320322887","funder_display_name":"The Research Council"}],"funders":[{"id":"https://openalex.org/F4320322887","display_name":"The Research Council","ror":"https://ror.org/03tcppy59"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1988245217","https://openalex.org/W2137966130","https://openalex.org/W2145339207","https://openalex.org/W2950226226","https://openalex.org/W2997070234","https://openalex.org/W3127561923","https://openalex.org/W3135380146","https://openalex.org/W3152878473","https://openalex.org/W3207250575","https://openalex.org/W4200140111","https://openalex.org/W4280508537","https://openalex.org/W4308211465","https://openalex.org/W4309835728","https://openalex.org/W4312605311","https://openalex.org/W4315784722","https://openalex.org/W4382318178","https://openalex.org/W4386607612","https://openalex.org/W4388052812","https://openalex.org/W4388574051","https://openalex.org/W4391559291","https://openalex.org/W4391667216","https://openalex.org/W4391795801","https://openalex.org/W4391974559","https://openalex.org/W4392347372","https://openalex.org/W4396506409","https://openalex.org/W4399541205","https://openalex.org/W4399767971","https://openalex.org/W4401452744","https://openalex.org/W4402450920","https://openalex.org/W4409659344"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2],"(DRL)":[3],"methods,":[4],"which":[5],"train":[6],"a":[7,18,29,91,102],"policy":[8,41,94,169],"to":[9,16,37,60,81,135,138,155,167],"obtain":[10],"the":[11,34,39,44,62,65,75,107,123,129,136,143,160],"sequence":[12,105],"of":[13,64,176],"actions":[14],"required":[15],"complete":[17],"task,":[19],"have":[20],"achieved":[21],"remarkable":[22],"success":[23],"across":[24,192],"diverse":[25],"applications.":[26],"It":[27],"is":[28,153],"long-standing":[30],"open":[31],"issue":[32],"in":[33,106],"DRL":[35,70,98,190],"community":[36],"make":[38],"trained":[40],"gradually":[42],"approach":[43,112,178],"theoretically":[45],"globally":[46],"optimal":[47],"policy,":[48],"and":[49,83,116,119,132,158,179],"existing":[50,97],"research":[51],"has":[52],"also":[53],"explored":[54],"several":[55],"challenges,":[56],"such":[57],"as":[58,122],"exploration-exploitation,":[59],"improve":[61],"quality":[63],"obtained":[66],"policy.":[67],"However,":[68],"most":[69],"methods":[71,99,191],"rely":[72],"solely":[73],"on":[74,196],"current":[76,130],"state":[77,104,125,164],"for":[78],"decision-making,":[79],"leading":[80,166],"short-sightedness":[82],"suboptimal":[84],"learning.":[85,170],"To":[86],"overcome":[87],"this,":[88],"we":[89],"propose":[90],"neighboring":[92,103,124,151],"state-aware":[93],"that":[95,184],"enhances":[96,187],"by":[100,150],"incorporating":[101],"decision-making":[108,157],"process.":[109,145],"Specifically,":[110],"our":[111,177],"saves":[113],"multiple":[114],"past":[115],"future":[117],"states":[118],"concatenates":[120],"them":[121,134],"sequence,":[126],"along":[127],"with":[128],"state,":[131],"inputs":[133],"actor":[137],"generate":[139],"an":[140],"action":[141],"during":[142],"training":[144],"This":[146],"global":[147],"perspective,":[148],"provided":[149],"states,":[152],"similar":[154],"human":[156],"helps":[159],"agent":[161],"better":[162],"understand":[163],"evolution,":[165],"improved":[168],"We":[171],"present":[172],"two":[173],"specific":[174],"implementations":[175],"demonstrate":[180],"through":[181],"extensive":[182],"experiments":[183],"it":[185],"effectively":[186],"ten":[188],"representative":[189],"nine":[193],"tasks,":[194],"based":[195],"three":[197],"metrics,":[198],"including":[199],"return.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
