{"id":"https://openalex.org/W4410226728","doi":"https://doi.org/10.1109/tnnls.2025.3564078","title":"Multistate Temporal Difference Target for Model-Free Reinforcement Learning","display_name":"Multistate Temporal Difference Target for Model-Free Reinforcement Learning","publication_year":2025,"publication_date":"2025-05-09","ids":{"openalex":"https://openalex.org/W4410226728","doi":"https://doi.org/10.1109/tnnls.2025.3564078","pmid":"https://pubmed.ncbi.nlm.nih.gov/40343824"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3564078","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3564078","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108936925","display_name":"Wuhao Wang","orcid":"https://orcid.org/0009-0001-5343-5951"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wuhao Wang","raw_affiliation_strings":["School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0001-5343-5951","affiliations":[{"raw_affiliation_string":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370053","display_name":"Zhiyong Chen","orcid":"https://orcid.org/0000-0002-2033-4249"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhiyong Chen","raw_affiliation_strings":["School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-2033-4249","affiliations":[{"raw_affiliation_string":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017296869","display_name":"Lepeng Zhang","orcid":"https://orcid.org/0000-0002-5319-2996"},"institutions":[{"id":"https://openalex.org/I102134673","display_name":"Link\u00f6ping University","ror":"https://ror.org/05ynxx418","country_code":"SE","type":"education","lineage":["https://openalex.org/I102134673"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Lepeng Zhang","raw_affiliation_strings":["Department of Computer and Information Science, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"],"raw_orcid":"https://orcid.org/0000-0002-5319-2996","affiliations":[{"raw_affiliation_string":"Department of Computer and Information Science, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden","institution_ids":["https://openalex.org/I102134673"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04030976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"9","first_page":"16854","last_page":"16863"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.7625834941864014},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7199592590332031},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5113281607627869},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3797956705093384}],"concepts":[{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.7625834941864014},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7199592590332031},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5113281607627869},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3797956705093384}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3564078","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3564078","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40343824","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40343824","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5600000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W2089434629","https://openalex.org/W2165131254","https://openalex.org/W2593237273","https://openalex.org/W2761873684","https://openalex.org/W2802441085","https://openalex.org/W2900130428","https://openalex.org/W2904246096","https://openalex.org/W2912748933","https://openalex.org/W2963401755","https://openalex.org/W2971254261","https://openalex.org/W2982316857","https://openalex.org/W3029901419","https://openalex.org/W3041202696","https://openalex.org/W3160355689","https://openalex.org/W4200038771","https://openalex.org/W4313482696","https://openalex.org/W4361029961","https://openalex.org/W4364375123","https://openalex.org/W4380302325","https://openalex.org/W4387171816","https://openalex.org/W4396609072"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W2145363145","https://openalex.org/W2341346307","https://openalex.org/W2154399718","https://openalex.org/W2768629321","https://openalex.org/W4384574988","https://openalex.org/W2130711276","https://openalex.org/W4321463377","https://openalex.org/W3088331655","https://openalex.org/W4308828368"],"abstract_inverted_index":{"Temporal":[0],"difference":[1],"(TD)":[2],"learning":[3,10,122],"is":[4],"a":[5,22,61,75],"fundamental":[6],"technique":[7],"in":[8,96],"reinforcement":[9],"that":[11,55,89,115],"updates":[12],"value":[13,34,43,64],"function":[14,65],"estimates":[15],"for":[16,60],"states":[17,59],"or":[18],"state-action":[19],"pairs":[20],"using":[21],"TD":[23,52,70],"target.":[24],"This":[25],"target":[26,54,120],"represents":[27],"an":[28,49],"improved":[29],"estimate":[30],"of":[31,44,93,134],"the":[32,41,91,118,132],"true":[33],"by":[35],"incorporating":[36],"both":[37],"immediate":[38],"rewards":[39],"and":[40,99,107],"estimated":[42],"subsequent":[45,58,77],"states.":[46],"We":[47],"propose":[48],"enhanced":[50],"multistate":[51],"(MSTD)":[53],"utilizes":[56],"multiple":[57],"more":[62],"accurate":[63],"estimation":[66],"compared":[67,124],"to":[68,125],"traditional":[69,126],"learning,":[71],"which":[72],"relies":[73],"on":[74,80],"single":[76],"state.":[78],"Building":[79],"this":[81],"new":[82],"MSTD":[83,119],"concept,":[84],"we":[85,130],"develop":[86],"actor-critic":[87,109],"algorithms":[88,116],"include":[90],"management":[92],"replay":[94],"buffers":[95],"two":[97],"modes":[98],"integrate":[100],"with":[101,136],"deep":[102],"deterministic":[103],"policy":[104],"optimization":[105],"(DDPG)":[106],"soft":[108],"(SAC).":[110],"Numerical":[111],"experiment":[112],"results":[113],"demonstrate":[114],"employing":[117],"improve":[121],"performance":[123],"methods.":[127],"In":[128],"addition,":[129],"analyze":[131],"convergence":[133],"Q-learning":[135],"MSTD.":[137]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
