{"id":"https://openalex.org/W2582946978","doi":"https://doi.org/10.1109/ssci.2016.7849368","title":"Q-learning with experience replay in a dynamic environment","display_name":"Q-learning with experience replay in a dynamic environment","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2582946978","doi":"https://doi.org/10.1109/ssci.2016.7849368","mag":"2582946978"},"language":"en","primary_location":{"id":"doi:10.1109/ssci.2016.7849368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssci.2016.7849368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE Symposium Series on Computational Intelligence (SSCI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.rug.nl/en/publications/a5ed743f-4597-4e0a-a2da-21fdba5b6a72","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061783986","display_name":"Mathijs Pieters","orcid":null},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Mathijs Pieters","raw_affiliation_strings":["Institute of Artificial Intelligence and Cognitive Engineering, University of Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Cognitive Engineering, University of Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060596453","display_name":"Marco Wiering","orcid":null},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Marco A. Wiering","raw_affiliation_strings":["Institute of Artificial Intelligence and Cognitive Engineering, University of Groningen, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Cognitive Engineering, University of Groningen, The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061783986"],"corresponding_institution_ids":["https://openalex.org/I169381384"],"apc_list":null,"apc_paid":null,"fwci":2.9993,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.93145944,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7316433787345886},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4490616023540497},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.4055112600326538}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7316433787345886},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4490616023540497},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4055112600326538}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ssci.2016.7849368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssci.2016.7849368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE Symposium Series on Computational Intelligence (SSCI)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:openaire/a5ed743f-4597-4e0a-a2da-21fdba5b6a72","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/a5ed743f-4597-4e0a-a2da-21fdba5b6a72","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Pieters, M & Wiering, M A 2017, Q-learning with experience replay in a dynamic environment. in 2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016., 7849368, 2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016, Institute of Electrical and Electronics Engineers Inc., 2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016, Athens, Greece, 06/12/2016. https://doi.org/10.1109/SSCI.2016.7849368","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.rug.nl:openaire/a5ed743f-4597-4e0a-a2da-21fdba5b6a72","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/a5ed743f-4597-4e0a-a2da-21fdba5b6a72","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Pieters, M & Wiering, M A 2017, Q-learning with experience replay in a dynamic environment. in 2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016., 7849368, 2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016, Institute of Electrical and Electronics Engineers Inc., 2016 IEEE Symposium Series on Computational Intelligence, SSCI 2016, Athens, Greece, 06/12/2016. https://doi.org/10.1109/SSCI.2016.7849368","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W14769209","https://openalex.org/W32403112","https://openalex.org/W1487219635","https://openalex.org/W1504212531","https://openalex.org/W1515851193","https://openalex.org/W1550736008","https://openalex.org/W1554233645","https://openalex.org/W1605577844","https://openalex.org/W1641379095","https://openalex.org/W1977561226","https://openalex.org/W1982928247","https://openalex.org/W2076150152","https://openalex.org/W2076337359","https://openalex.org/W2101786389","https://openalex.org/W2107941094","https://openalex.org/W2141559645","https://openalex.org/W2146598871","https://openalex.org/W2160308170","https://openalex.org/W2165131254","https://openalex.org/W2201581102","https://openalex.org/W2532691423","https://openalex.org/W2951573205","https://openalex.org/W2963477884","https://openalex.org/W3142468893","https://openalex.org/W4214807090","https://openalex.org/W6629027733","https://openalex.org/W6630225054","https://openalex.org/W6632929819","https://openalex.org/W6636236989","https://openalex.org/W7011085707"],"related_works":["https://openalex.org/W2355862304","https://openalex.org/W2356108042","https://openalex.org/W2030250808","https://openalex.org/W2376796979","https://openalex.org/W2379418341","https://openalex.org/W2380054981","https://openalex.org/W2393110101","https://openalex.org/W2379285345","https://openalex.org/W4239328682","https://openalex.org/W2372054075"],"abstract_inverted_index":{"Most":[0],"research":[1],"in":[2,107,118,158,184],"reinforcement":[3,88,137],"learning":[4,89,138],"has":[5],"focused":[6],"on":[7,48,78,82],"stationary":[8],"environments.":[9],"In":[10,97,175],"this":[11,62],"paper,":[12],"we":[13,90,121],"propose":[14],"several":[15],"adaptations":[16],"of":[17,32,35,57,70,94,125],"Q-learning":[18,169],"for":[19,23],"a":[20,33,44,51,55,104,114,143,171,185],"dynamic":[21],"environment,":[22],"both":[24,111,119,162],"single":[25],"and":[26,60,145,170,193,206],"multiple":[27],"agents.":[28,130],"The":[29,154,196],"environment":[30,161,178],"consists":[31],"grid":[34],"random":[36],"rewards,":[37],"where":[38,73],"every":[39],"reward":[40,126,182,194,199],"is":[41,149,187,201],"removed":[42],"after":[43],"visit.":[45],"We":[46,66],"focus":[47],"experience":[49,71,163],"replay,":[50,72],"technique":[52],"that":[53,157],"receives":[54],"lot":[56],"attention":[58],"nowadays,":[59],"combine":[61],"method":[63],"with":[64,203],"Q-learning.":[65],"compare":[67,91],"two":[68,92],"variations":[69,93,120],"experiences":[74],"are":[75],"reused":[76],"based":[77,81],"time":[79],"or":[80],"the":[83,98,101,108,123,129,147,150,159,176,179],"obtained":[84,202],"reward.":[85],"For":[86],"multi-agent":[87,136,177],"policy":[95],"representation.":[96],"first":[99],"variation":[100,110],"agents":[102,112],"share":[103],"Q-function,":[105],"while":[106],"second":[109],"have":[113],"separate":[115,204,207],"Q-function.":[116],"Furthermore,":[117],"test":[122],"effect":[124],"sharing":[127,142,146],"between":[128],"This":[131],"leads":[132],"to":[133],"four":[134],"different":[135],"algorithms,":[139],"from":[140],"which":[141],"Q-function":[144,192],"rewards":[148],"most":[151],"cooperative":[152],"method.":[153],"results":[155],"show":[156],"single-agent":[160],"replay":[164],"algorithms":[165],"significantly":[166],"outperform":[167],"standard":[168],"greedy":[172],"benchmark":[173],"agent.":[174],"highest":[180,197],"maximum":[181],"sum":[183,200],"trial":[186],"achieved":[188],"by":[189],"using":[190],"one":[191],"sharing.":[195],"mean":[198],"Q-functions":[205],"rewards.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":5}],"updated_date":"2026-01-28T23:14:49.684275","created_date":"2025-10-10T00:00:00"}