{"id":"https://openalex.org/W2775408020","doi":"https://doi.org/10.1109/icacci.2017.8125811","title":"Comparison of reinforcement learning algorithms applied to the cart-pole problem","display_name":"Comparison of reinforcement learning algorithms applied to the cart-pole problem","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2775408020","doi":"https://doi.org/10.1109/icacci.2017.8125811","mag":"2775408020"},"language":"en","primary_location":{"id":"doi:10.1109/icacci.2017.8125811","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icacci.2017.8125811","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on Advances in Computing, Communications and Informatics (ICACCI)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1810.01940","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037108669","display_name":"S. Nagendra","orcid":"https://orcid.org/0000-0003-4350-0307"},"institutions":[{"id":"https://openalex.org/I196608512","display_name":"PES University","ror":"https://ror.org/05m169e78","country_code":"IN","type":"education","lineage":["https://openalex.org/I196608512"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Savinay Nagendra","raw_affiliation_strings":["Dept. Elect. Electro. Engg., PES Inst. of Tech., Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Dept. Elect. Electro. Engg., PES Inst. of Tech., Bangalore, India","institution_ids":["https://openalex.org/I196608512"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087436359","display_name":"Nikhil Podila","orcid":null},"institutions":[{"id":"https://openalex.org/I196608512","display_name":"PES University","ror":"https://ror.org/05m169e78","country_code":"IN","type":"education","lineage":["https://openalex.org/I196608512"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Nikhil Podila","raw_affiliation_strings":["Dept. Elect. Electro. Engg., PES Inst. of Tech., Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Dept. Elect. Electro. Engg., PES Inst. of Tech., Bangalore, India","institution_ids":["https://openalex.org/I196608512"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008718177","display_name":"Rashmi Ugarakhod","orcid":null},"institutions":[{"id":"https://openalex.org/I196608512","display_name":"PES University","ror":"https://ror.org/05m169e78","country_code":"IN","type":"education","lineage":["https://openalex.org/I196608512"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rashmi Ugarakhod","raw_affiliation_strings":["Dept. Electro. Commun. Engg., PES University, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Dept. Electro. Commun. Engg., PES University, Bangalore, India","institution_ids":["https://openalex.org/I196608512"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011337165","display_name":"Koshy George","orcid":"https://orcid.org/0000-0002-9818-7031"},"institutions":[{"id":"https://openalex.org/I196608512","display_name":"PES University","ror":"https://ror.org/05m169e78","country_code":"IN","type":"education","lineage":["https://openalex.org/I196608512"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Koshy George","raw_affiliation_strings":["Dept. Electro. Commun. Engg., PES University, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Dept. Electro. Commun. Engg., PES University, Bangalore, India","institution_ids":["https://openalex.org/I196608512"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5037108669"],"corresponding_institution_ids":["https://openalex.org/I196608512"],"apc_list":null,"apc_paid":null,"fwci":1.6629,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.8854383,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"26","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9321584701538086},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7757974863052368},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6843860149383545},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.6692922115325928},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6621150970458984},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.6347149014472961},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.591549277305603},{"id":"https://openalex.org/keywords/swing","display_name":"Swing","score":0.5469026565551758},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.504909336566925},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.4578586518764496},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44765743613243103},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4364451766014099},{"id":"https://openalex.org/keywords/dynamical-systems-theory","display_name":"Dynamical systems theory","score":0.4244956374168396},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4128817319869995},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.33427804708480835},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.32808738946914673},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.32384416460990906},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2724296748638153},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20931383967399597},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09971818327903748}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9321584701538086},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7757974863052368},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6843860149383545},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.6692922115325928},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6621150970458984},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.6347149014472961},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.591549277305603},{"id":"https://openalex.org/C65655974","wikidata":"https://www.wikidata.org/wiki/Q14867674","display_name":"Swing","level":2,"score":0.5469026565551758},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.504909336566925},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.4578586518764496},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44765743613243103},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4364451766014099},{"id":"https://openalex.org/C79379906","wikidata":"https://www.wikidata.org/wiki/Q3174497","display_name":"Dynamical systems theory","level":2,"score":0.4244956374168396},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4128817319869995},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.33427804708480835},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32808738946914673},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32384416460990906},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2724296748638153},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20931383967399597},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09971818327903748},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icacci.2017.8125811","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icacci.2017.8125811","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on Advances in Computing, Communications and Informatics (ICACCI)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1810.01940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1810.01940","pdf_url":"https://arxiv.org/pdf/1810.01940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1810.01940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1810.01940","pdf_url":"https://arxiv.org/pdf/1810.01940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W46130386","https://openalex.org/W51114640","https://openalex.org/W166862392","https://openalex.org/W1569296262","https://openalex.org/W1599353299","https://openalex.org/W2082374298","https://openalex.org/W2084424121","https://openalex.org/W2091565802","https://openalex.org/W2102295697","https://openalex.org/W2108682071","https://openalex.org/W2121863487","https://openalex.org/W2160989584","https://openalex.org/W2171369025","https://openalex.org/W2539083524","https://openalex.org/W2550491676","https://openalex.org/W3022436500","https://openalex.org/W4214717370","https://openalex.org/W6601865881","https://openalex.org/W6676023451","https://openalex.org/W6685191362"],"related_works":["https://openalex.org/W2955790965","https://openalex.org/W3105579180","https://openalex.org/W2149418961","https://openalex.org/W4289355352","https://openalex.org/W4308702637","https://openalex.org/W2808418668","https://openalex.org/W4256087190","https://openalex.org/W4240668504","https://openalex.org/W4399155503","https://openalex.org/W2011233848"],"abstract_inverted_index":{"Designing":[0],"optimal":[1,37],"controllers":[2],"continues":[3],"to":[4,26],"be":[5],"challenging":[6],"as":[7,67],"systems":[8],"are":[9,13,74],"becoming":[10],"complex":[11],"and":[12,34,71,95],"inherently":[14],"nonlinear.":[15],"The":[16],"principal":[17],"advantage":[18],"of":[19,49,51,61],"reinforcement":[20],"learning":[21],"(RL)":[22],"is":[23,44],"its":[24],"ability":[25],"learn":[27],"from":[28],"the":[29,32,47,52,62,80],"interaction":[30],"with":[31,57,79],"environment":[33],"provide":[35],"an":[36],"control":[38,50],"strategy.":[39],"In":[40],"this":[41,77],"paper,":[42],"RL":[43,64,94],"explored":[45],"in":[46,76],"context":[48,78],"benchmark":[53],"cart-pole":[54],"dynamical":[55],"system":[56],"no":[58],"prior":[59],"knowledge":[60],"dynamics.":[63],"algorithms":[65],"such":[66],"temporal-difference,":[68],"policy-gradient":[69],"actorcritic,":[70],"value-function":[72],"approximation":[73],"compared":[75],"standard":[81],"linear":[82],"quadratic":[83],"regulator":[84],"solution.":[85],"Further,":[86],"we":[87],"propose":[88],"a":[89],"novel":[90],"approach":[91],"for":[92],"integrating":[93],"swing-up":[96],"controllers.":[97]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
