{"id":"https://openalex.org/W2784192398","doi":"https://doi.org/10.1109/humanoids.2017.8246900","title":"Emergence of human-comparable balancing behaviours by deep reinforcement learning","display_name":"Emergence of human-comparable balancing behaviours by deep reinforcement learning","publication_year":2017,"publication_date":"2017-11-01","ids":{"openalex":"https://openalex.org/W2784192398","doi":"https://doi.org/10.1109/humanoids.2017.8246900","mag":"2784192398"},"language":"en","primary_location":{"id":"doi:10.1109/humanoids.2017.8246900","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids.2017.8246900","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE-RAS 17th International Conference on Humanoid Robotics (Humanoids)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1809.02074","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chuanyu Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chuanyu Yang","raw_affiliation_strings":["School of Informatics, The University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, The University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Taku Komura","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Taku Komura","raw_affiliation_strings":["School of Informatics, The University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, The University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zhibin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhibin Li","raw_affiliation_strings":["School of Informatics, The University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, The University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6563,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.69188814,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"372","last_page":"377"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11685","display_name":"Zebrafish Biomedical Research Applications","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8363000154495239},{"id":"https://openalex.org/keywords/zero-moment-point","display_name":"Zero moment point","score":0.7017999887466431},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.6646999716758728},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.583299994468689},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4681999981403351},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.44920000433921814},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.3779999911785126}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8363000154495239},{"id":"https://openalex.org/C19245436","wikidata":"https://www.wikidata.org/wiki/Q279038","display_name":"Zero moment point","level":4,"score":0.7017999887466431},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.6646999716758728},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.583299994468689},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5101000070571899},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4846999943256378},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4681999981403351},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.3714999854564667},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.36980000138282776},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C168031717","wikidata":"https://www.wikidata.org/wiki/Q1530280","display_name":"Balance (ability)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2651999890804291},{"id":"https://openalex.org/C145565327","wikidata":"https://www.wikidata.org/wiki/Q852514","display_name":"Motion control","level":3,"score":0.25220000743865967}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/humanoids.2017.8246900","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids.2017.8246900","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE-RAS 17th International Conference on Humanoid Robotics (Humanoids)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1809.02074","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1809.02074","pdf_url":"https://arxiv.org/pdf/1809.02074","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/f6444164-25b4-4481-b270-a30407351bd6","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/f6444164-25b4-4481-b270-a30407351bd6","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Yang, C, Komura, T & Li, Z 2018, Emergence of Human-comparable Balancing Behaviors by Deep Reinforcement Learning. in 2017 IEEE-RAS International Conference on Humanoid Robots. Institute of Electrical and Electronics Engineers, IEEE-RAS International Conference on Humanoid Robots, Birmingham, United Kingdom, 15/11/17. https://doi.org/10.1109/HUMANOIDS.2017.8246900","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/f6444164-25b4-4481-b270-a30407351bd6","is_oa":true,"landing_page_url":"http://ieeexplore.ieee.org/document/8246900/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Yang, C, Komura, T & Li, Z 2018, Emergence of Human-comparable Balancing Behaviors by Deep Reinforcement Learning. in 2017 IEEE-RAS International Conference on Humanoid Robots. Institute of Electrical and Electronics Engineers, IEEE-RAS International Conference on Humanoid Robots, Birmingham, United Kingdom, 15/11/17. https://doi.org/10.1109/HUMANOIDS.2017.8246900","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1809.02074","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1809.02074","pdf_url":"https://arxiv.org/pdf/1809.02074","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5187574974","display_name":null,"funder_award_id":"1957059","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2014505645","https://openalex.org/W2080421646","https://openalex.org/W2101539915","https://openalex.org/W2109790581","https://openalex.org/W2110215557","https://openalex.org/W2128532136","https://openalex.org/W2145339207","https://openalex.org/W2160365700","https://openalex.org/W2210512403","https://openalex.org/W2460299708","https://openalex.org/W2539534359","https://openalex.org/W2595365649","https://openalex.org/W2739330054","https://openalex.org/W4300892751","https://openalex.org/W6638018090","https://openalex.org/W6684205842","https://openalex.org/W6692846177"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,81,93,109],"hierarchical":[4],"framework":[5],"based":[6,47,106],"on":[7,48],"deep":[8,65],"reinforcement":[9,66,90],"learning":[10,67,91],"that":[11,16,100],"naturally":[12],"acquires":[13],"control":[14,79],"policies":[15],"are":[17],"capable":[18],"of":[19,34,60,71,96,101,112],"performing":[20],"balancing":[21],"behaviours":[22,62],"such":[23],"as":[24],"ankle":[25],"push-offs":[26],"for":[27,39,76],"humanoid":[28,77],"robots,":[29],"without":[30],"explicit":[31],"human":[32],"design":[33],"controllers.":[35],"Only":[36],"the":[37,41,49,64,69,85,102,118],"reward":[38],"training":[40],"neural":[42],"network":[43],"is":[44],"specifically":[45],"formulated":[46],"physical":[50],"principles":[51],"and":[52,54],"quantities,":[53],"hence":[55],"explainable.":[56],"The":[57],"successful":[58],"emergence":[59],"human-comparable":[61],"through":[63],"demonstrates":[68],"feasibility":[70],"using":[72,113],"an":[73],"AI-based":[74],"approach":[75],"motion":[78],"in":[80],"unified":[82],"framework.":[83],"Moreover,":[84],"balance":[86],"strategies":[87],"learned":[88],"by":[89],"provides":[92],"larger":[94],"range":[95],"disturbance":[97],"rejection":[98],"than":[99],"zero":[103],"moment":[104],"point":[105],"methods,":[107],"suggesting":[108],"research":[110],"direction":[111],"learning-based":[114],"controls":[115],"to":[116],"explore":[117],"optimal":[119],"performance.":[120]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2018-01-26T00:00:00"}
