{"id":"https://openalex.org/W2902541517","doi":"https://doi.org/10.1017/s0269888918000280","title":"Q-Table compression for reinforcement learning","display_name":"Q-Table compression for reinforcement learning","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2902541517","doi":"https://doi.org/10.1017/s0269888918000280","mag":"2902541517"},"language":"en","primary_location":{"id":"doi:10.1017/s0269888918000280","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888918000280","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028682495","display_name":"Leonardo Amado","orcid":"https://orcid.org/0000-0001-6119-4601"},"institutions":[{"id":"https://openalex.org/I45643870","display_name":"Pontif\u00edcia Universidade Cat\u00f3lica do Rio Grande do Sul","ror":"https://ror.org/025vmq686","country_code":"BR","type":"education","lineage":["https://openalex.org/I45643870"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Leonardo Amado","raw_affiliation_strings":["Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil; e-mail","Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil","e-mail"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil; e-mail","institution_ids":["https://openalex.org/I45643870"]},{"raw_affiliation_string":"Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil","institution_ids":["https://openalex.org/I45643870"]},{"raw_affiliation_string":"e-mail","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073632183","display_name":"Felipe Meneguzzi","orcid":"https://orcid.org/0000-0003-3549-6168"},"institutions":[{"id":"https://openalex.org/I45643870","display_name":"Pontif\u00edcia Universidade Cat\u00f3lica do Rio Grande do Sul","ror":"https://ror.org/025vmq686","country_code":"BR","type":"education","lineage":["https://openalex.org/I45643870"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Felipe Meneguzzi","raw_affiliation_strings":["Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil; e-mail","e-mail","Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil"],"raw_orcid":"https://orcid.org/0000-0003-3549-6168","affiliations":[{"raw_affiliation_string":"Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil; e-mail","institution_ids":["https://openalex.org/I45643870"]},{"raw_affiliation_string":"e-mail","institution_ids":[]},{"raw_affiliation_string":"Pontifical Catholic University of Rio Grande do Sul, Av. Ipiranga 6681, Porto Alegre, RS, 90619-900, Brazil","institution_ids":["https://openalex.org/I45643870"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5028682495"],"corresponding_institution_ids":["https://openalex.org/I45643870"],"apc_list":null,"apc_paid":null,"fwci":0.3385,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.70087914,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"33","issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9015980958938599},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8115749955177307},{"id":"https://openalex.org/keywords/branching","display_name":"Branching (polymer chemistry)","score":0.5403459668159485},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5086008310317993},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48812994360923767},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.48120003938674927},{"id":"https://openalex.org/keywords/factor","display_name":"Factor (programming language)","score":0.4559209942817688},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.44379132986068726},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42764461040496826},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.34051603078842163},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3344244956970215},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.11173990368843079},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09282717108726501}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9015980958938599},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8115749955177307},{"id":"https://openalex.org/C206175624","wikidata":"https://www.wikidata.org/wiki/Q595731","display_name":"Branching (polymer chemistry)","level":2,"score":0.5403459668159485},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5086008310317993},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48812994360923767},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.48120003938674927},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.4559209942817688},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.44379132986068726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42764461040496826},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34051603078842163},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3344244956970215},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.11173990368843079},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09282717108726501},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s0269888918000280","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888918000280","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W6043852","https://openalex.org/W149149418","https://openalex.org/W267428601","https://openalex.org/W605348272","https://openalex.org/W1498436455","https://openalex.org/W1515851193","https://openalex.org/W1658008008","https://openalex.org/W1757796397","https://openalex.org/W1910131649","https://openalex.org/W1969489491","https://openalex.org/W2025768430","https://openalex.org/W2029514605","https://openalex.org/W2059148040","https://openalex.org/W2103626435","https://openalex.org/W2107726111","https://openalex.org/W2110906765","https://openalex.org/W2118318536","https://openalex.org/W2119567691","https://openalex.org/W2125074935","https://openalex.org/W2131600418","https://openalex.org/W2145339207","https://openalex.org/W2150823748","https://openalex.org/W2154997814","https://openalex.org/W2165698076","https://openalex.org/W2166798247","https://openalex.org/W2167489871","https://openalex.org/W2198041288","https://openalex.org/W2334782222","https://openalex.org/W2547416798","https://openalex.org/W2749807327","https://openalex.org/W2787259794","https://openalex.org/W2964056654","https://openalex.org/W4312531999","https://openalex.org/W6676007687","https://openalex.org/W6677834374"],"related_works":["https://openalex.org/W2462424100","https://openalex.org/W2148974013","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W3009457412","https://openalex.org/W2992629954"],"abstract_inverted_index":{"Abstract":[0],"Reinforcement":[1],"learning":[2],"(RL)":[3],"algorithms":[4,25],"are":[5,97],"often":[6],"used":[7],"to":[8,27,47,67,108],"compute":[9],"agents":[10,110],"capable":[11],"of":[12,19,51,65,78,84,105,124,127],"acting":[13],"in":[14,29,53,81,111],"environments":[15,30],"without":[16],"prior":[17],"knowledge":[18],"the":[20,49,69,76,82,106],"environment":[21],"dynamics.":[22],"However,":[23],"these":[24],"struggle":[26],"converge":[28],"with":[31],"large":[32,37,70],"branching":[33,71,95],"factors":[34],"and":[35,94,120],"their":[36],"resulting":[38],"state-spaces.":[39],"In":[40],"this":[41,131],"work,":[42],"we":[43],"develop":[44,62],"an":[45,103,112],"approach":[46],"compress":[48],"number":[50,123],"entries":[52],"a":[54,58,63,85,98,122],"Q-value":[55],"table":[56],"using":[57],"deep":[59],"auto-encoder.":[60],"We":[61,74,100],"set":[64],"techniques":[66,80],"mitigate":[68],"factor":[72,96],"problem.":[73,99,132],"present":[75],"application":[77],"such":[79],"scenario":[83,115],"real-time":[86],"strategy":[87],"(RTS)":[88],"game,":[89],"where":[90,116],"both":[91],"state":[92],"space":[93],"empirically":[101],"evaluate":[102],"implementation":[104],"technique":[107],"control":[109],"RTS":[113],"game":[114],"classical":[117],"RL":[118],"fails":[119],"provide":[121],"possible":[125],"avenues":[126],"further":[128],"work":[129],"on":[130]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-01-08T20:05:33.558190","created_date":"2025-10-10T00:00:00"}
