{"id":"https://openalex.org/W4392667113","doi":"https://doi.org/10.1109/tetci.2024.3369641","title":"Data Efficient Deep Reinforcement Learning With Action-Ranked Temporal Difference Learning","display_name":"Data Efficient Deep Reinforcement Learning With Action-Ranked Temporal Difference Learning","publication_year":2024,"publication_date":"2024-03-11","ids":{"openalex":"https://openalex.org/W4392667113","doi":"https://doi.org/10.1109/tetci.2024.3369641"},"language":"en","primary_location":{"id":"doi:10.1109/tetci.2024.3369641","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3369641","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100453159","display_name":"Qi Liu","orcid":"https://orcid.org/0000-0001-7485-6344"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qi Liu","raw_affiliation_strings":["Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100677178","display_name":"Yanjie Li","orcid":"https://orcid.org/0000-0001-7890-9677"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjie Li","raw_affiliation_strings":["Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064580686","display_name":"Yuecheng Liu","orcid":"https://orcid.org/0000-0002-3683-7959"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuecheng Liu","raw_affiliation_strings":["Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055451322","display_name":"Ke Lin","orcid":"https://orcid.org/0000-0002-3429-5877"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Lin","raw_affiliation_strings":["Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054634605","display_name":"Jianqi Gao","orcid":"https://orcid.org/0000-0003-4486-3740"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqi Gao","raw_affiliation_strings":["Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087749601","display_name":"Yunjiang Lou","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunjiang Lou","raw_affiliation_strings":["Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100453159"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":2.4326,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.89735909,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"8","issue":"4","first_page":"2949","last_page":"2961"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9570000171661377,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7129451036453247},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.6121201515197754},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5691184997558594},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5319942235946655},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47768551111221313},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45169636607170105},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.353421688079834},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.27480822801589966},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.12155786156654358}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7129451036453247},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.6121201515197754},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5691184997558594},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5319942235946655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47768551111221313},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45169636607170105},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.353421688079834},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.27480822801589966},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.12155786156654358},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetci.2024.3369641","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3369641","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G1137392626","display_name":null,"funder_award_id":"JCYJ20220818102415033","funder_id":"https://openalex.org/F4320329791","funder_display_name":"Shenzhen Fundamental Research Program"},{"id":"https://openalex.org/G1665521743","display_name":null,"funder_award_id":"61977019","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G228230819","display_name":null,"funder_award_id":"JSGG20201103093802006","funder_id":"https://openalex.org/F4320329791","funder_display_name":"Shenzhen Fundamental Research Program"},{"id":"https://openalex.org/G2636664055","display_name":null,"funder_award_id":"JCYJ20180507183837726","funder_id":"https://openalex.org/F4320329791","funder_display_name":"Shenzhen Fundamental Research Program"},{"id":"https://openalex.org/G7386022970","display_name":null,"funder_award_id":"U1813206","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329791","display_name":"Shenzhen Fundamental Research Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2161966552","https://openalex.org/W2292533394","https://openalex.org/W2294827575","https://openalex.org/W2740377041","https://openalex.org/W2746553466","https://openalex.org/W2963523627","https://openalex.org/W2963658727","https://openalex.org/W3038822267","https://openalex.org/W3170533301","https://openalex.org/W3192510920","https://openalex.org/W4311554030","https://openalex.org/W4322729780","https://openalex.org/W4378190882","https://openalex.org/W6627932998","https://openalex.org/W6677067356","https://openalex.org/W6683603353","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6730641667","https://openalex.org/W6740092555","https://openalex.org/W6742529797","https://openalex.org/W6747473740","https://openalex.org/W6749304979","https://openalex.org/W6752380930","https://openalex.org/W6780470247","https://openalex.org/W6781750019","https://openalex.org/W6796524807","https://openalex.org/W6797064975","https://openalex.org/W6803519892","https://openalex.org/W6847317037"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W2145363145","https://openalex.org/W2341346307","https://openalex.org/W2154399718","https://openalex.org/W4321463377","https://openalex.org/W4384574988","https://openalex.org/W2768629321","https://openalex.org/W2130711276","https://openalex.org/W4308828368","https://openalex.org/W1528400370"],"abstract_inverted_index":{"In":[0,36],"value-based":[1,157],"deep":[2,97,158,185],"reinforcement":[3],"learning":[4,17,90],"(RL),":[5],"value":[6,34,63,113,146],"function":[7],"approximation":[8],"errors":[9],"lead":[10],"to":[11,25,42,92,105,154,183],"suboptimal":[12],"policies.":[13],"Temporal":[14],"difference":[15],"(TD)":[16],"is":[18,40],"one":[19],"of":[20,49,68,79,96,108,140,173,205],"the":[21,65,76,94,106,116,138,141,151,165,180,190],"most":[22],"important":[23],"methodologies":[24],"approximate":[26],"state-action":[27,111],"(":[28],"<inline-formula":[29,44,59,69,80,117,142],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[30,45,60,70,81,118,143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[31,46,61,71,82,119,144],"notation=\"LaTeX\">$Q$</tex-math></inline-formula>":[32,47,62,72,83,120,145],")":[33],"function.":[35],"TD":[37,89,102,135,194,201],"learning,":[38,136,195],"it":[39],"critical":[41],"estimate":[43],"values":[48,121,133],"greedy":[50],"actions":[51],"more":[52,56,130,147],"accurately":[53],"because":[54],"a":[55,123,155,198],"accurate":[57,131],"target":[58,132],"enhances":[64],"estimation":[66,77,139],"accuracy":[67,78],"value.":[73],"To":[74,187],"improve":[75],"value,":[84],"we":[85,178,196],"propose":[86,197],"an":[87],"action-ranked":[88,193,200],"method":[91,127,153,167,182],"enhance":[93],"performance":[95],"RL":[98,159],"by":[99],"weighting":[100],"each":[101],"error":[103],"according":[104],"rank":[107],"its":[109],"corresponding":[110],"pair's":[112],"among":[114],"all":[115],"on":[122,170,214],"state.":[124],"The":[125],"proposed":[126,152,166,181],"can":[128],"provide":[129],"for":[134],"making":[137],"accurate.":[148],"We":[149],"apply":[150],"representative":[156],"algorithm,":[160],"and":[161,218],"results":[162],"show":[163],"that":[164,209],"outperforms":[168],"baselines":[169,213],"31":[171],"out":[172],"40":[174],"Atari":[175,215],"games.":[176],"Furthermore,":[177],"extend":[179],"multi-agent":[184],"RL.":[186],"adaptively":[188],"determine":[189],"hyperparameter":[191],"in":[192],"meta":[199],"learning.":[202],"A":[203],"series":[204],"experiments":[206],"quantitatively":[207],"verify":[208],"our":[210],"methods":[211],"outperform":[212],"games,":[216],"StarCraft-II,":[217],"Grid":[219],"World":[220],"environments.":[221]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-29T08:15:47.926485","created_date":"2025-10-10T00:00:00"}
