{"id":"https://openalex.org/W2109008048","doi":"https://doi.org/10.1109/iros.2004.1389841","title":"Stochastic policy gradient reinforcement learning on a simple 3D biped","display_name":"Stochastic policy gradient reinforcement learning on a simple 3D biped","publication_year":2005,"publication_date":"2005-04-01","ids":{"openalex":"https://openalex.org/W2109008048","doi":"https://doi.org/10.1109/iros.2004.1389841","mag":"2109008048"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2004.1389841","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2004.1389841","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No.04CH37566)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074291890","display_name":"Russ Tedrake","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]},{"id":"https://openalex.org/I4210159271","display_name":"MIT-Harvard Center for Ultracold Atoms","ror":"https://ror.org/053tmcn30","country_code":"US","type":"facility","lineage":["https://openalex.org/I136199984","https://openalex.org/I4210159271","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"R. Tedrake","raw_affiliation_strings":["Computer Science & Artificial Intelligence Lab, Center for Bits & Atoms, Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science & Artificial Intelligence Lab, Center for Bits & Atoms, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159271","https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074781984","display_name":"Tianbao Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210104429","display_name":"Institute of Cognitive and Brain Sciences","ror":"https://ror.org/01c3w3270","country_code":"US","type":"education","lineage":["https://openalex.org/I4210104429"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"T.W. Zhang","raw_affiliation_strings":["Department of Mechanical Engineering Brain & Cognitive Sciences, Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering Brain & Cognitive Sciences, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210104429","https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075369363","display_name":"H. Sebastian Seung","orcid":null},"institutions":[{"id":"https://openalex.org/I1344073410","display_name":"Howard Hughes Medical Institute","ror":"https://ror.org/006w34k90","country_code":"US","type":"facility","lineage":["https://openalex.org/I1344073410"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H.S. Seung","raw_affiliation_strings":["Howard Hughes Medical Institute Brain & Cognitive Sciences Center for Bits & Atoms, Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Howard Hughes Medical Institute Brain & Cognitive Sciences Center for Bits & Atoms, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I1344073410","https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5074291890"],"corresponding_institution_ids":["https://openalex.org/I4210159271","https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":15.0579,"has_fulltext":false,"cited_by_count":270,"citation_normalized_percentile":{"value":0.99390002,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"3","issue":null,"first_page":"2849","last_page":"2854"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11023","display_name":"Prosthetics and Rehabilitation Robotics","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7681680917739868},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6597417593002319},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6414161920547485},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4832342565059662},{"id":"https://openalex.org/keywords/degrees-of-freedom","display_name":"Degrees of freedom (physics and chemistry)","score":0.47843697667121887},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.43232855200767517},{"id":"https://openalex.org/keywords/terrain","display_name":"Terrain","score":0.4195058345794678},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.41585391759872437},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.2615240812301636}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7681680917739868},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6597417593002319},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6414161920547485},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4832342565059662},{"id":"https://openalex.org/C208081375","wikidata":"https://www.wikidata.org/wiki/Q274502","display_name":"Degrees of freedom (physics and chemistry)","level":2,"score":0.47843697667121887},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.43232855200767517},{"id":"https://openalex.org/C161840515","wikidata":"https://www.wikidata.org/wiki/Q186131","display_name":"Terrain","level":2,"score":0.4195058345794678},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.41585391759872437},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2615240812301636},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iros.2004.1389841","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2004.1389841","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No.04CH37566)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.69.3173","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.3173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://hebb.mit.edu/people/seung/papers/iros04.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306079","display_name":"David and Lucile Packard Foundation","ror":"https://ror.org/032atxq54"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1521427077","https://openalex.org/W1686197137","https://openalex.org/W1994923984","https://openalex.org/W1996763811","https://openalex.org/W2041459584","https://openalex.org/W2111999018","https://openalex.org/W2119105564","https://openalex.org/W2119717200","https://openalex.org/W2127036885","https://openalex.org/W2139053308","https://openalex.org/W2144446635","https://openalex.org/W2155027007","https://openalex.org/W2163668399","https://openalex.org/W2569805627","https://openalex.org/W3103182070","https://openalex.org/W6676801769","https://openalex.org/W6683204974","https://openalex.org/W6731402942"],"related_works":["https://openalex.org/W1992962589","https://openalex.org/W3032871857","https://openalex.org/W4296209631","https://openalex.org/W4226458444","https://openalex.org/W3213331859","https://openalex.org/W4390637946","https://openalex.org/W2909067415","https://openalex.org/W4226082913","https://openalex.org/W1977095816","https://openalex.org/W2088811509"],"abstract_inverted_index":{"We":[0,78,119],"present":[1],"a":[2,13,23,38,62,68,84,121,138],"learning":[3,41,76,111,147],"system":[4,100,148],"which":[5,58],"is":[6,155],"able":[7,156],"to":[8,52,67,126,157,160],"quickly":[9,150],"and":[10,40,66,93,104,107,130],"reliably":[11],"acquire":[12],"robust":[14],"feedback":[15],"control":[16,99],"policy":[17,123],"for":[18],"3D":[19],"dynamic":[20,64],"walking":[21,36],"from":[22],"blank-slate":[24],"using":[25,137],"only":[26,87],"trials":[27],"implemented":[28],"on":[29,113],"our":[30,56],"physical":[31],"robot.":[32],"The":[33],"robot":[34,85,154],"begins":[35],"within":[37],"minute":[39],"converges":[42],"in":[43,71,101],"approximately":[44],"20":[45],"minutes.":[46],"This":[47,145],"success":[48],"can":[49],"be":[50],"attributed":[51],"the":[53,72,75,80,98,102,110,114,132,135,142,153,161],"mechanics":[54],"of":[55,74,91,134,141],"robot,":[57],"are":[59],"modeled":[60],"after":[61],"passive":[63],"walker,":[65],"dramatic":[69],"reduction":[70],"dimensionality":[73,81],"problem.":[77],"reduce":[79],"by":[82,96,108],"designing":[83],"with":[86],"6":[88],"internal":[89],"degrees":[90],"freedom":[92],"4":[94],"actuators,":[95],"decomposing":[97],"frontal":[103],"sagittal":[105],"planes,":[106],"formulating":[109],"problem":[112,129],"discrete":[115],"return":[116],"map":[117],"dynamics.":[118],"apply":[120],"stochastic":[122],"gradient":[124],"algorithm":[125],"this":[127],"reduced":[128],"decrease":[131],"variance":[133],"update":[136],"state-based":[139],"estimate":[140],"expected":[143],"cost.":[144],"optimized":[146],"works":[149],"enough":[151],"that":[152],"continually":[158],"adapt":[159],"terrain":[162],"as":[163],"it":[164],"walks.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":19},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":14},{"year":2015,"cited_by_count":15},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":14},{"year":2012,"cited_by_count":18}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
