{"id":"https://openalex.org/W2982380123","doi":"https://doi.org/10.1109/icstcc.2019.8885715","title":"A Comparative Performance Study of Reinforcement Learning Algorithms for a Continuous Space Problem","display_name":"A Comparative Performance Study of Reinforcement Learning Algorithms for a Continuous Space Problem","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W2982380123","doi":"https://doi.org/10.1109/icstcc.2019.8885715","mag":"2982380123"},"language":"en","primary_location":{"id":"doi:10.1109/icstcc.2019.8885715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icstcc.2019.8885715","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 23rd International Conference on System Theory, Control and Computing (ICSTCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082970308","display_name":"Andreea-Iulia Patachi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210108695","display_name":"Gheorghe Asachi Technical University of Ia\u0219i","ror":"https://ror.org/014zxnz40","country_code":"RO","type":"education","lineage":["https://openalex.org/I4210108695"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Andreea-Iulia Patachi","raw_affiliation_strings":["Department of Computer Science and Engineering, Gheorghe Asachi Technical University of Iasi, Iasi, Romania","Department of Computer Science and Engineering, \u201cGheorghe Asachi\u201d Technical University of Ia\u015fi, Ia\u015fi, Romania","Department of Computer Science and Engineering, \"Gheorghe Asachi\" Technical University of Ia\u015fi, Ia\u015fi, Romania"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Gheorghe Asachi Technical University of Iasi, Iasi, Romania","institution_ids":["https://openalex.org/I4210108695"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, \u201cGheorghe Asachi\u201d Technical University of Ia\u015fi, Ia\u015fi, Romania","institution_ids":["https://openalex.org/I4210108695"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, \"Gheorghe Asachi\" Technical University of Ia\u015fi, Ia\u015fi, Romania","institution_ids":["https://openalex.org/I4210108695"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063576338","display_name":"Florin Leon","orcid":"https://orcid.org/0000-0002-1370-9145"},"institutions":[{"id":"https://openalex.org/I4210108695","display_name":"Gheorghe Asachi Technical University of Ia\u0219i","ror":"https://ror.org/014zxnz40","country_code":"RO","type":"education","lineage":["https://openalex.org/I4210108695"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Florin Leon","raw_affiliation_strings":["Department of Computer Science and Engineering, Gheorghe Asachi Technical University of Iasi, Iasi, Romania","Department of Computer Science and Engineering, \u201cGheorghe Asachi\u201d Technical University of Ia\u015fi, Ia\u015fi, Romania","Department of Computer Science and Engineering, \"Gheorghe Asachi\" Technical University of Ia\u015fi, Ia\u015fi, Romania"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Gheorghe Asachi Technical University of Iasi, Iasi, Romania","institution_ids":["https://openalex.org/I4210108695"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, \u201cGheorghe Asachi\u201d Technical University of Ia\u015fi, Ia\u015fi, Romania","institution_ids":["https://openalex.org/I4210108695"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, \"Gheorghe Asachi\" Technical University of Ia\u015fi, Ia\u015fi, Romania","institution_ids":["https://openalex.org/I4210108695"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5082970308"],"corresponding_institution_ids":["https://openalex.org/I4210108695"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.13141151,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"8","issue":null,"first_page":"860","last_page":"865"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8358985185623169},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7298884391784668},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6529709100723267},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6514490842819214},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6481325626373291},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6084779500961304},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5293769836425781},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.5133997201919556},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.47713541984558105},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4611437916755676},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.392752081155777},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1079263985157013}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8358985185623169},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7298884391784668},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6529709100723267},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6514490842819214},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6481325626373291},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6084779500961304},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5293769836425781},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.5133997201919556},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.47713541984558105},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4611437916755676},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.392752081155777},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1079263985157013},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icstcc.2019.8885715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icstcc.2019.8885715","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 23rd International Conference on System Theory, Control and Computing (ICSTCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W368409423","https://openalex.org/W1538131130","https://openalex.org/W1600437712","https://openalex.org/W2046376809","https://openalex.org/W2113913482","https://openalex.org/W2144366468","https://openalex.org/W2145339207","https://openalex.org/W2156737235","https://openalex.org/W2158150115","https://openalex.org/W2257979135","https://openalex.org/W2583528914","https://openalex.org/W2736601468","https://openalex.org/W2738669288","https://openalex.org/W2950395671","https://openalex.org/W2964082094","https://openalex.org/W4214717370","https://openalex.org/W4293396018","https://openalex.org/W6632100814","https://openalex.org/W6682972110","https://openalex.org/W6683195989","https://openalex.org/W6732951832","https://openalex.org/W6735579001","https://openalex.org/W6741002519"],"related_works":["https://openalex.org/W1585007175","https://openalex.org/W2382521049","https://openalex.org/W4306904969","https://openalex.org/W4377865163","https://openalex.org/W3193857078","https://openalex.org/W2888956734","https://openalex.org/W3000197790","https://openalex.org/W4315865067","https://openalex.org/W2979433843","https://openalex.org/W3208304128"],"abstract_inverted_index":{"Deep":[0,60,63],"learning":[1],"algorithms":[2],"have":[3],"made":[4],"significant":[5],"progress":[6],"in":[7],"recent":[8],"years":[9],"due":[10],"to":[11,37,52,81,87],"the":[12,35,53,58,77,92,100,114],"power":[13],"of":[14,32],"deep":[15],"neural":[16,107],"networks,":[17],"beyond":[18],"human":[19],"performance":[20,47],"levels.":[21],"Unfortunately,":[22],"there":[23],"are":[24,95],"many":[25],"challenges":[26],"that":[27,85,105],"require":[28],"a":[29,46,71,106],"large":[30],"number":[31],"interactions":[33],"with":[34,110],"environment":[36],"learn":[38,80],"and":[39,62],"generalize":[40],"hidden":[41],"states.":[42],"This":[43],"paper":[44],"presents":[45],"comparison":[48],"between":[49],"different":[50],"solutions":[51],"Mountain":[54,67],"Car":[55,68],"problem":[56,75],"using":[57],"Q-Learning,":[59],"Q-Learning":[61],"Q-Networks":[64],"algorithms.":[65],"The":[66,102],"model":[69],"is":[70,113],"simple":[72],"but":[73,94],"intriguing":[74],"since":[76],"agent":[78],"must":[79],"take":[82],"initial":[83],"actions":[84],"seem":[86],"prevent":[88],"it":[89],"from":[90],"reaching":[91],"goal,":[93],"nevertheless":[96],"mandatory":[97],"for":[98,118],"solving":[99],"problem.":[101],"results":[103],"show":[104],"network":[108],"implementation":[109],"experience":[111],"replay":[112],"most":[115],"effective":[116],"method":[117],"this":[119],"task.":[120]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
