{"id":"https://openalex.org/W1983486975","doi":"https://doi.org/10.1080/09528130110063100","title":"Q-Learning: computation of optimal Q-values for evaluating the learning level in robotic tasks","display_name":"Q-Learning: computation of optimal Q-values for evaluating the learning level in robotic tasks","publication_year":2001,"publication_date":"2001-07-01","ids":{"openalex":"https://openalex.org/W1983486975","doi":"https://doi.org/10.1080/09528130110063100","mag":"1983486975"},"language":"en","primary_location":{"id":"doi:10.1080/09528130110063100","is_oa":false,"landing_page_url":"https://doi.org/10.1080/09528130110063100","pdf_url":null,"source":{"id":"https://openalex.org/S153467142","display_name":"Journal of Experimental & Theoretical Artificial Intelligence","issn_l":"0952-813X","issn":["0952-813X","1362-3079"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Experimental &amp; Theoretical Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050637227","display_name":"Tiziana D\u2019Orazio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tiziana D'Orazio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5047403740","display_name":"Grazia Cicirelli","orcid":"https://orcid.org/0000-0003-1562-0467"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grazia Cicirelli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.09448475,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"3","first_page":"241","last_page":"270"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.794011652469635},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.7695334553718567},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7410168051719666},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.6964093446731567},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6482739448547363},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.5346813201904297},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5263258814811707},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.4855630099773407},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4839956760406494},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4804205894470215},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4361266493797302},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3797150254249573},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3339604139328003},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.1859557032585144},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16802409291267395}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.794011652469635},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.7695334553718567},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7410168051719666},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.6964093446731567},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6482739448547363},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.5346813201904297},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5263258814811707},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.4855630099773407},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4839956760406494},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4804205894470215},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4361266493797302},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3797150254249573},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3339604139328003},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.1859557032585144},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16802409291267395},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/09528130110063100","is_oa":false,"landing_page_url":"https://doi.org/10.1080/09528130110063100","pdf_url":null,"source":{"id":"https://openalex.org/S153467142","display_name":"Journal of Experimental & Theoretical Artificial Intelligence","issn_l":"0952-813X","issn":["0952-813X","1362-3079"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Experimental &amp; Theoretical Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1557517019","https://openalex.org/W1612579644","https://openalex.org/W1626977535","https://openalex.org/W1789155456","https://openalex.org/W1924069939","https://openalex.org/W1967835318","https://openalex.org/W1988071341","https://openalex.org/W2030153071","https://openalex.org/W2076552100","https://openalex.org/W2086436089","https://openalex.org/W2091565802","https://openalex.org/W2105519301","https://openalex.org/W2107726111","https://openalex.org/W2118001193","https://openalex.org/W2122701159","https://openalex.org/W2127290018","https://openalex.org/W2147750403","https://openalex.org/W2150147323","https://openalex.org/W2150339816","https://openalex.org/W2152166054","https://openalex.org/W2319588593","https://openalex.org/W2322148045","https://openalex.org/W3011120880"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W4376605461","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W2892507673","https://openalex.org/W2361647908","https://openalex.org/W2937181779","https://openalex.org/W2537866915"],"abstract_inverted_index":{"Abstract":[0],"A":[1],"problem":[2],"related":[3],"to":[4,50,63,113],"the":[5,17,21,29,33,36,54,65,68,72,76,80,84,87,117,121,125],"use":[6],"of":[7,19,46,75,124],"reinforcement":[8],"learning":[9,22,133],"(RL)":[10],"algorithms":[11],"on":[12],"real":[13],"robot":[14],"applications":[15],"is":[16,35,49,61],"difficulty":[18],"measuring":[20],"level":[23],"reached":[24],"after":[25],"some":[26],"experience.":[27],"Among":[28],"different":[30,132],"RL":[31],"algorithms,":[32],"Q-learning":[34,81,126],"most":[37],"widely":[38],"used":[39],"in":[40,115],"accomplishing":[41],"robotic":[42],"tasks.":[43],"The":[44],"aim":[45],"this":[47],"work":[48],"a":[51],"priori":[52],"evaluate":[53],"optimal":[55,91],"Q-values":[56],"for":[57,86,90],"problems":[58],"where":[59],"it":[60],"possible":[62],"compute":[64],"distance":[66],"between":[67],"current":[69],"state":[70,74],"and":[71,92,100,104],"goal":[73],"system.":[77],"Starting":[78],"from":[79],"updating":[82],"formula":[83],"equations":[85],"maximum":[88],"Q-weights,":[89],"non-optimal":[93],"actions,":[94],"have":[95,109,128],"been":[96,110,129],"computed":[97],"considering":[98],"delayed":[99],"immediate":[101],"rewards.":[102],"Deterministic":[103],"non":[105],"deterministic":[106],"grid-world":[107],"environments":[108],"also":[111],"considered":[112],"test":[114],"simulations":[116],"obtained":[118],"equations.":[119],"Besides":[120],"convergence":[122],"rates":[123],"algorithm":[127],"compared":[130],"using":[131],"rate":[134],"parameters.":[135],"Keywords:":[136],"Q-LEARNING":[137],"Optimal":[138],"Q-VALUES":[139],"Learning":[140],"Parameters":[141],"Convergence":[142],"Rate":[143]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
