{"id":"https://openalex.org/W2134140787","doi":"https://doi.org/10.1109/adprl.2009.4927532","title":"The QV family compared to other reinforcement learning algorithms","display_name":"The QV family compared to other reinforcement learning algorithms","publication_year":2009,"publication_date":"2009-03-01","ids":{"openalex":"https://openalex.org/W2134140787","doi":"https://doi.org/10.1109/adprl.2009.4927532","mag":"2134140787"},"language":"en","primary_location":{"id":"doi:10.1109/adprl.2009.4927532","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2009.4927532","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060596453","display_name":"Marco Wiering","orcid":"https://orcid.org/0000-0003-4331-7537"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Marco A. Wiering","raw_affiliation_strings":["Department of Artificial Intelligence, University of Groningam, Netherlands","Department of Artificial Intelligence of the University of Groningen, Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, University of Groningam, Netherlands","institution_ids":[]},{"raw_affiliation_string":"Department of Artificial Intelligence of the University of Groningen, Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033135596","display_name":"Hado van Hasselt","orcid":null},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]},{"id":"https://openalex.org/I173063890","display_name":"University of Applied Sciences Utrecht","ror":"https://ror.org/028z9kw20","country_code":"NL","type":"education","lineage":["https://openalex.org/I173063890"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Hado van Hasselt","raw_affiliation_strings":["Intelligent Systems Group of Utrecht University, Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Systems Group of Utrecht University, Netherlands","institution_ids":["https://openalex.org/I173063890","https://openalex.org/I193662353"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5060596453"],"corresponding_institution_ids":["https://openalex.org/I169381384"],"apc_list":null,"apc_paid":null,"fwci":3.6142,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.93310254,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"8","issue":null,"first_page":"101","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9351638555526733},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.706317663192749},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5956434607505798},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5639122724533081},{"id":"https://openalex.org/keywords/learning-classifier-system","display_name":"Learning classifier system","score":0.5464524030685425},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5005369186401367},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4101516604423523}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9351638555526733},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.706317663192749},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5956434607505798},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5639122724533081},{"id":"https://openalex.org/C199190896","wikidata":"https://www.wikidata.org/wiki/Q3509276","display_name":"Learning classifier system","level":3,"score":0.5464524030685425},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5005369186401367},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4101516604423523}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/adprl.2009.4927532","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2009.4927532","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:openaire_cris_publications/f46669c4-8de6-4efb-ad92-e262c8d1732c","is_oa":false,"landing_page_url":"https://hdl.handle.net/11370/f46669c4-8de6-4efb-ad92-e262c8d1732c","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Wiering, M A & van Hasselt, H 2009, The QV Family Compared to Other Reinforcement Learning Algorithms. in ADPRL: 2009 IEEE SYMPOSIUM ON ADAPTIVE DYNAMIC PROGRAMMING AND REINFORCEMENT LEARNING. IEEE (The Institute of Electrical and Electronics Engineers), NEW YORK, pp. 101-108, IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning, Tunisia, 30/03/2009.","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:rug:oai:pure.rug.nl:publications/f46669c4-8de6-4efb-ad92-e262c8d1732c","is_oa":false,"landing_page_url":"https://research.rug.nl/en/publications/f46669c4-8de6-4efb-ad92-e262c8d1732c","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ADPRL: 2009 IEEE SYMPOSIUM ON ADAPTIVE DYNAMIC PROGRAMMING AND REINFORCEMENT LEARNING, 101 - 108","raw_type":"info:eu-repo/semantics/conferencepaper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W166862392","https://openalex.org/W1546167776","https://openalex.org/W1547105496","https://openalex.org/W1549353711","https://openalex.org/W1605318140","https://openalex.org/W1824921430","https://openalex.org/W2043806097","https://openalex.org/W2048226872","https://openalex.org/W2064018461","https://openalex.org/W2100677568","https://openalex.org/W2107726111","https://openalex.org/W2121863487","https://openalex.org/W2124175081","https://openalex.org/W2125074935","https://openalex.org/W2144446635","https://openalex.org/W2147750403","https://openalex.org/W2150339816","https://openalex.org/W2152445738","https://openalex.org/W2155027007","https://openalex.org/W2165131254","https://openalex.org/W3011120880","https://openalex.org/W3041202696","https://openalex.org/W3103182070","https://openalex.org/W4214717370","https://openalex.org/W4233696721","https://openalex.org/W4245108548","https://openalex.org/W6678114464","https://openalex.org/W6678545291","https://openalex.org/W6683204974"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W1966456942","https://openalex.org/W2166117066","https://openalex.org/W3148138296","https://openalex.org/W4294873804","https://openalex.org/W3127551068","https://openalex.org/W2808546214","https://openalex.org/W2152445738","https://openalex.org/W225045806"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"several":[3],"new":[4,14,46],"online":[5],"model-free":[6],"reinforcement":[7,15],"learning":[8],"(RL)":[9],"algorithms.":[10],"We":[11,50,71],"designed":[12],"three":[13],"algorithms,":[16,62,108],"namely:":[17,63],"QV2,":[18],"QVMAX,":[19],"and":[20,36,42,69,109],"QVMAX2,":[21],"that":[22,95,110,118],"are":[23,38],"all":[24],"based":[25],"on":[26,74,86,123],"the":[27,87,106],"QV-learning":[28,125],"algorithm,":[29],"but":[30],"in":[31],"contrary":[32],"to":[33,55],"QV-learning,":[34,68],"QVMAX":[35],"QVMAX2":[37],"off-policy":[39],"RL":[40,48,61,116],"algorithms":[41,54],"QV2":[43],"is":[44,112],"a":[45,56,114],"on-policy":[47],"algorithm.":[49],"experimentally":[51],"compare":[52],"these":[53],"large":[57,102],"number":[58],"of":[59,78],"different":[60,97,107],"Q-learning,":[64],"Sarsa,":[65],"R-learning,":[66],"Actor-Critic,":[67],"ACLA.":[70],"show":[72,83,94],"experiments":[73],"five":[75],"maze":[76],"problems":[77],"varying":[79],"complexity.":[80],"Furthermore,":[81],"we":[82],"experimental":[84],"results":[85,93],"cart":[88],"pole":[89],"balancing":[90],"problem.":[91],"The":[92],"for":[96],"problems,":[98],"there":[99,111],"can":[100],"be":[101],"performance":[103],"differences":[104],"between":[105],"not":[113],"single":[115],"algorithm":[117],"always":[119],"performs":[120],"best,":[121],"although":[122],"average":[124],"scores":[126],"highest.":[127]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}