{"id":"https://openalex.org/W2154549708","doi":"https://doi.org/10.1177/105971239700600201","title":"Experiments with Reinforcement Learning in Problems with Continuous State and Action Spaces","display_name":"Experiments with Reinforcement Learning in Problems with Continuous State and Action Spaces","publication_year":1997,"publication_date":"1997-09-01","ids":{"openalex":"https://openalex.org/W2154549708","doi":"https://doi.org/10.1177/105971239700600201","mag":"2154549708"},"language":"en","primary_location":{"id":"doi:10.1177/105971239700600201","is_oa":false,"landing_page_url":"https://doi.org/10.1177/105971239700600201","pdf_url":null,"source":{"id":"https://openalex.org/S183337005","display_name":"Adaptive Behavior","issn_l":"1059-7123","issn":["1059-7123","1741-2633"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Adaptive Behavior","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://repository.gatech.edu/bitstreams/0e62a701-9109-4bec-b355-c21338ae5a65/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109081206","display_name":"Juan Carlos Santamaria","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Juan C. Santamaria","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004923102","display_name":"Richard S. Sutton","orcid":"https://orcid.org/0000-0002-3679-3415"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Richard S. Sutton","raw_affiliation_strings":["University of Massachusetts"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044452407","display_name":"Ashwin Ram","orcid":"https://orcid.org/0000-0003-1430-8770"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashwin Ram","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109081206"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":12.0764,"has_fulltext":true,"cited_by_count":258,"citation_normalized_percentile":{"value":0.98574586,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"6","issue":"2","first_page":"163","last_page":"217"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8623331785202026},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5849101543426514},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5701417922973633},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5379348993301392},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48714181780815125},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.47840556502342224},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.47662174701690674},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4629472494125366},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.32260456681251526},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29027286171913147},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.10775354504585266}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8623331785202026},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5849101543426514},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5701417922973633},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5379348993301392},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48714181780815125},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.47840556502342224},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.47662174701690674},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4629472494125366},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32260456681251526},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29027286171913147},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.10775354504585266},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1177/105971239700600201","is_oa":false,"landing_page_url":"https://doi.org/10.1177/105971239700600201","pdf_url":null,"source":{"id":"https://openalex.org/S183337005","display_name":"Adaptive Behavior","issn_l":"1059-7123","issn":["1059-7123","1741-2633"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Adaptive Behavior","raw_type":"journal-article"},{"id":"pmh:oai:smartech.gatech.edu:1853/21747","is_oa":true,"landing_page_url":"http://hdl.handle.net/1853/21747","pdf_url":"http://repository.gatech.edu/bitstreams/0e62a701-9109-4bec-b355-c21338ae5a65/download","source":{"id":"https://openalex.org/S4377196313","display_name":"SMARTech Repository (Georgia Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130701444","host_organization_name":"Georgia Institute of Technology","host_organization_lineage":["https://openalex.org/I130701444"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.27.4603","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.27.4603","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cc.gatech.edu/aimosaic/robot-lab/online-publications/tech-report.ps.gz","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.48.8280","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8280","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ftp://ftp.cs.umass.edu/pub/techrept/techreport/1996/UM-CS-1996-088.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.53.1018","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.53.1018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ftp://ftp.cs.umass.edu/pub/anw/pub/sutton/SSR-98.ps.Z","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.63.1168","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.63.1168","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-anw.cs.umass.edu/~rich/papers/SSR-98.pdf","raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:smartech.gatech.edu:1853/21747","is_oa":true,"landing_page_url":"http://hdl.handle.net/1853/21747","pdf_url":"http://repository.gatech.edu/bitstreams/0e62a701-9109-4bec-b355-c21338ae5a65/download","source":{"id":"https://openalex.org/S4377196313","display_name":"SMARTech Repository (Georgia Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130701444","host_organization_name":"Georgia Institute of Technology","host_organization_lineage":["https://openalex.org/I130701444"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2154549708.pdf","grobid_xml":"https://content.openalex.org/works/W2154549708.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W3185548","https://openalex.org/W582361224","https://openalex.org/W1525999779","https://openalex.org/W1572161815","https://openalex.org/W1588388339","https://openalex.org/W1706571876","https://openalex.org/W1966195676","https://openalex.org/W1969420452","https://openalex.org/W1981236334","https://openalex.org/W1981276685","https://openalex.org/W1986241547","https://openalex.org/W1993740947","https://openalex.org/W1995657560","https://openalex.org/W2037958061","https://openalex.org/W2042242286","https://openalex.org/W2091565802","https://openalex.org/W2098432798","https://openalex.org/W2100677568","https://openalex.org/W2104122126","https://openalex.org/W2113913482","https://openalex.org/W2114987061","https://openalex.org/W2124175081","https://openalex.org/W2139418546","https://openalex.org/W2172246523","https://openalex.org/W2182044576","https://openalex.org/W2341171179","https://openalex.org/W2473732573","https://openalex.org/W2540877599","https://openalex.org/W2808421695","https://openalex.org/W3011120880","https://openalex.org/W3041202696","https://openalex.org/W4212774754","https://openalex.org/W4231226883"],"related_works":["https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W3096874164","https://openalex.org/W4376605461","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3099153698","https://openalex.org/W3087814763","https://openalex.org/W2892507673","https://openalex.org/W2025663273"],"abstract_inverted_index":{"A":[0,48],"key":[1],"element":[2],"in":[3,51,170,192,242,255],"the":[4,11,22,66,87,105,113,141,156,196,203,209,233,246,256,264],"solution":[5,111],"of":[6,16,27,75,89,112,143,158,190,195,208,229,245],"reinforcement":[7,52,127],"learning":[8,53,128,206],"problems":[9],"is":[10,19,33,64,116],"value":[12,26,67,88,106,142,166,234],"function.":[13],"The":[14,31],"purpose":[15],"this":[17,40,97,120],"function":[18,32,68,99,107,137,162,185,224,235],"to":[20,42,45,56,85,103,164,222,262],"measure":[21,41],"long-term":[23],"utility":[24],"or":[25],"any":[28],"given":[29],"state.":[30],"important":[34,243],"because":[35],"an":[36],"agent":[37],"can":[38,133,219,236],"use":[39],"decide":[43],"what":[44],"do":[46],"next.":[47],"common":[49],"problem":[50],"when":[54,108],"applied":[55],"systems":[57,261],"having":[58,187],"continuous":[59],"states":[60],"and":[61,93,149,168,179,198,205,215,248,258],"action":[62,94,150,199,249],"spaces":[63,200],"that":[65,80,131,139,218,232],"must":[69],"operate":[70],"with":[71,136,226,239],"a":[72,109,124,213],"domain":[73],"consisting":[74],"real-valued":[76],"variables,":[77],"which":[78],"means":[79],"it":[81,132],"should":[82],"be":[83,134,220,237],"able":[84],"represent":[86,104,165],"infinitely":[90],"many":[91],"state":[92,148,197,247],"pairs.":[95],"For":[96],"reason,":[98],"approximators":[100,138,163,186,225],"are":[101],"used":[102,135,221],"close-form":[110],"optimal":[114],"policy":[115],"not":[117],"available.":[118],"In":[119,152],"article,":[121],"we":[122,154,182],"extend":[123],"previously":[125],"proposed":[126,265],"algorithm":[129],"so":[130,231],"generalize":[140],"individual":[144],"experiences":[145],"across":[146],"both":[147],"spaces.":[151,250],"particular,":[153],"discuss":[155,183],"benefits":[157],"using":[159],"sparse":[160],"coarse-coded":[161],"functions":[167],"describe":[169],"detail":[171],"three":[172],"implementations:":[173],"cerebellar":[174],"model":[175],"articulation":[176],"controllers,":[177],"instance-based,":[178],"case-based.":[180],"Additionally,":[181],"how":[184],"different":[188,193],"degrees":[189,228],"resolution":[191,230],"regions":[194,244],"may":[201],"influence":[202],"performance":[204],"efficiency":[207],"agent.":[210],"We":[211,251],"propose":[212],"simple":[214],"modular":[216],"technique":[217],"implement":[223],"nonuniform":[227],"represented":[238],"higher":[240],"accuracy":[241],"performed":[252],"extensive":[253],"experiments":[254],"double-integrator":[257],"pendulum":[259],"swing-up":[260],"demonstrate":[263],"ideas.":[266],"'":[267]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":10},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":10}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
