{"id":"https://openalex.org/W2963079702","doi":"https://doi.org/10.1109/allerton.2018.8636075","title":"The Effects of Memory Replay in Reinforcement Learning","display_name":"The Effects of Memory Replay in Reinforcement Learning","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2963079702","doi":"https://doi.org/10.1109/allerton.2018.8636075","mag":"2963079702"},"language":"en","primary_location":{"id":"doi:10.1109/allerton.2018.8636075","is_oa":false,"landing_page_url":"https://doi.org/10.1109/allerton.2018.8636075","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 56th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080351422","display_name":"Ruishan Liu","orcid":"https://orcid.org/0000-0002-7298-0701"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ruishan Liu","raw_affiliation_strings":["Department of Electrical Engineering, Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005779176","display_name":"James Zou","orcid":"https://orcid.org/0000-0001-8880-4764"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Zou","raw_affiliation_strings":["Department of Biomedical Data Science, Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Data Science, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5080351422"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":6.9381,"has_fulltext":false,"cited_by_count":110,"citation_normalized_percentile":{"value":0.97495255,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"478","last_page":"485"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8218002319335938},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8091722726821899},{"id":"https://openalex.org/keywords/ode","display_name":"Ode","score":0.6630796194076538},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.6186782717704773},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5415549278259277},{"id":"https://openalex.org/keywords/affect","display_name":"Affect (linguistics)","score":0.5181092619895935},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48808515071868896},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3373035192489624},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.0972541868686676},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07721593976020813},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.0654066801071167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8218002319335938},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8091722726821899},{"id":"https://openalex.org/C34862557","wikidata":"https://www.wikidata.org/wiki/Q178985","display_name":"Ode","level":2,"score":0.6630796194076538},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.6186782717704773},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5415549278259277},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.5181092619895935},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48808515071868896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3373035192489624},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0972541868686676},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07721593976020813},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0654066801071167},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/allerton.2018.8636075","is_oa":false,"landing_page_url":"https://doi.org/10.1109/allerton.2018.8636075","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 56th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W594357522","https://openalex.org/W1491843047","https://openalex.org/W1757796397","https://openalex.org/W1758031947","https://openalex.org/W2048226872","https://openalex.org/W2089434629","https://openalex.org/W2112483970","https://openalex.org/W2120346334","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2160308170","https://openalex.org/W2185087676","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2344023930","https://openalex.org/W2460299708","https://openalex.org/W2586680856","https://openalex.org/W2746553466","https://openalex.org/W2963477884","https://openalex.org/W2964118262","https://openalex.org/W4245108548","https://openalex.org/W4285719527","https://openalex.org/W4298857966","https://openalex.org/W6637967152","https://openalex.org/W6638058698","https://openalex.org/W6677737365","https://openalex.org/W6687681856","https://openalex.org/W6704665273"],"related_works":["https://openalex.org/W2371448224","https://openalex.org/W3134495997","https://openalex.org/W2363680170","https://openalex.org/W2376428685","https://openalex.org/W4256082577","https://openalex.org/W625783435","https://openalex.org/W2048742619","https://openalex.org/W2384641672","https://openalex.org/W2742179975","https://openalex.org/W2362647823"],"abstract_inverted_index":{"Experience":[0],"replay":[1,127],"is":[2,37],"a":[3,71,89,146],"key":[4],"technique":[5],"behind":[6],"many":[7],"recent":[8],"advances":[9],"in":[10,96],"deep":[11],"reinforcement":[12],"learning.":[13,120,131],"Allowing":[14],"the":[15,40,47,86,101,109,129,152],"agent":[16],"to":[17,58],"learn":[18],"from":[19],"earlier":[20],"memories":[21],"can":[22,106],"speed":[23],"up":[24],"learning":[25,53],"and":[26],"break":[27],"undesirable":[28],"temporal":[29],"correlations.":[30],"Despite":[31],"its":[32],"widespread":[33],"application,":[34],"very":[35,98],"little":[36,115],"understood":[38],"about":[39],"properties":[41],"of":[42,49,76,85,103],"experience":[43,79],"replay.":[44,80],"How":[45],"does":[46],"amount":[48,102],"memory":[50,104,116,153],"kept":[51,105],"affect":[52,108],"dynamics?":[54],"Does":[55],"it":[56],"help":[57],"prioritize":[59],"certain":[60],"experiences?":[61],"In":[62],"this":[63,97],"paper,":[64],"we":[65,122,144],"address":[66],"these":[67],"questions":[68],"by":[69],"formulating":[70],"dynamical":[72],"systems":[73],"ODE":[74,87],"model":[75],"Q-learning":[77],"with":[78,141],"We":[81,92,132],"derive":[82],"analytic":[83,136],"solutions":[84,137],"for":[88,149],"simple":[90,99,147],"setting.":[91],"show":[93,133],"that":[94,134],"even":[95],"setting,":[100],"substantially":[107],"agent's":[110,130],"performance-too":[111],"much":[112],"or":[113],"too":[114],"both":[117],"slow":[118],"down":[119],"Moreover,":[121],"characterize":[123],"regimes":[124],"where":[125],"prioritized":[126],"harms":[128],"our":[135],"have":[138],"excellent":[139],"agreement":[140],"experiments.":[142],"Finally,":[143],"propose":[145],"algorithm":[148],"adaptively":[150],"changing":[151],"buffer":[154],"size":[155],"which":[156],"achieves":[157],"consistently":[158],"good":[159],"empirical":[160],"performance.":[161]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":22},{"year":2021,"cited_by_count":14},{"year":2020,"cited_by_count":16},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
