{"id":"https://openalex.org/W1567397728","doi":"https://doi.org/10.1109/icsmc.2005.1571637","title":"Neural Reinforcement Learning to Swing-up and Balance a Real Pole","display_name":"Neural Reinforcement Learning to Swing-up and Balance a Real Pole","publication_year":2006,"publication_date":"2006-01-18","ids":{"openalex":"https://openalex.org/W1567397728","doi":"https://doi.org/10.1109/icsmc.2005.1571637","mag":"1567397728"},"language":"en","primary_location":{"id":"doi:10.1109/icsmc.2005.1571637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2005.1571637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2005 IEEE International Conference on Systems, Man and Cybernetics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041323275","display_name":"Martin Riedmiller","orcid":"https://orcid.org/0000-0002-8465-5690"},"institutions":[{"id":"https://openalex.org/I170658231","display_name":"Osnabr\u00fcck University","ror":"https://ror.org/04qmmjx98","country_code":"DE","type":"education","lineage":["https://openalex.org/I170658231"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"M. Riedmiller","raw_affiliation_strings":["Neuroinformatics Group, University of Osnabr\u00fcck, Osnabruck, Germany","Neuroinformatics Group, Osnabrueck Univ., Germany"],"affiliations":[{"raw_affiliation_string":"Neuroinformatics Group, University of Osnabr\u00fcck, Osnabruck, Germany","institution_ids":["https://openalex.org/I170658231"]},{"raw_affiliation_string":"Neuroinformatics Group, Osnabrueck Univ., Germany","institution_ids":["https://openalex.org/I170658231"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5041323275"],"corresponding_institution_ids":["https://openalex.org/I170658231"],"apc_list":null,"apc_paid":null,"fwci":2.3286,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.88773977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"4","issue":null,"first_page":"3191","last_page":"3196"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8510464429855347},{"id":"https://openalex.org/keywords/inverted-pendulum","display_name":"Inverted pendulum","score":0.7702857851982117},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6911277770996094},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6799873113632202},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.574264645576477},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5054504871368408},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5048940777778625},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.4712519943714142},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4706377685070038},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.46707987785339355},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.4459969997406006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39929434657096863},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.37580356001853943},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.17559993267059326}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8510464429855347},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.7702857851982117},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6911277770996094},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6799873113632202},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.574264645576477},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5054504871368408},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5048940777778625},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.4712519943714142},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4706377685070038},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.46707987785339355},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.4459969997406006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39929434657096863},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.37580356001853943},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.17559993267059326},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icsmc.2005.1571637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2005.1571637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2005 IEEE International Conference on Systems, Man and Cybernetics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1547105496","https://openalex.org/W2010798644","https://openalex.org/W2103626435","https://openalex.org/W2120346334","https://openalex.org/W2125074935","https://openalex.org/W2141559645","https://openalex.org/W2143908786","https://openalex.org/W2586680856","https://openalex.org/W2911283634","https://openalex.org/W2914656440","https://openalex.org/W4285719527","https://openalex.org/W6677737365","https://openalex.org/W6678545291"],"related_works":["https://openalex.org/W2475116013","https://openalex.org/W2066741154","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2059109728","https://openalex.org/W2082556335","https://openalex.org/W4281658507","https://openalex.org/W4224998860","https://openalex.org/W2598699656"],"abstract_inverted_index":{"This":[0,23,85],"paper":[1],"proposes":[2],"a":[3,18,34,38,88,93,113,126],"neural":[4,79,94],"network":[5],"based":[6],"reinforcement":[7,27],"learning":[8,28,90],"controller":[9,55],"that":[10],"is":[11,44,99],"able":[12],"to":[13,25,30,53,101,124],"learn":[14,102,125],"control":[15],"policies":[16],"in":[17,81,87],"highly":[19,104,127],"data":[20],"efficient":[21],"manner.":[22],"allows":[24],"apply":[26],"directly":[29],"real":[31,64,114,120],"plants":[32],"-neither":[33],"transition":[35,57,69],"model":[36,40],"nor":[37],"simulation":[39],"of":[41,92,109,119],"the":[42,54,63,78,103],"plant":[43],"needed":[45,123],"for":[46,76],"training.":[47],"The":[48,97,117],"only":[49],"training":[50,83],"information":[51],"provided":[52],"are":[56],"experiences":[58,70],"collected":[59],"from":[60,130],"interactions":[61],"with":[62],"plant.":[65],"By":[66],"storing":[67],"these":[68],"explicitly,":[71],"they":[72],"can":[73],"be":[74],"reconsidered":[75],"updating":[77],"Q-function":[80],"every":[82],"step.":[84],"results":[86],"stable":[89],"process":[91],"Q-value":[95],"function.":[96],"algorithm":[98],"applied":[100],"nonlinear":[105],"and":[106,111],"noisy":[107],"task":[108],"swinging-up":[110],"balancing":[112],"inverted":[115],"pendulum.":[116],"amount":[118],"time":[121],"interaction":[122],"effective":[128],"policy":[129],"scratch":[131],"was":[132],"less":[133],"than":[134],"14":[135],"minutes.":[136]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
