{"id":"https://openalex.org/W2029722408","doi":"https://doi.org/10.1109/icra.2014.6907631","title":"Combining learned controllers to achieve new goals based on linearly solvable MDPs","display_name":"Combining learned controllers to achieve new goals based on linearly solvable MDPs","publication_year":2014,"publication_date":"2014-05-01","ids":{"openalex":"https://openalex.org/W2029722408","doi":"https://doi.org/10.1109/icra.2014.6907631","mag":"2029722408"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2014.6907631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2014.6907631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031054137","display_name":"Eiji Uchibe","orcid":"https://orcid.org/0000-0001-7908-0258"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Eiji Uchibe","raw_affiliation_strings":["Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, Okinawa, Japan","Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, 1919-1 Tancha, Onna-son, Okinawa 904-0495, Japan"],"affiliations":[{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]},{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, 1919-1 Tancha, Onna-son, Okinawa 904-0495, Japan","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004840638","display_name":"Kenji Doya","orcid":"https://orcid.org/0000-0002-2446-6820"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kenji Doya","raw_affiliation_strings":["Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, Okinawa, Japan","Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, 1919-1 Tancha, Onna-son, Okinawa 904-0495, Japan"],"affiliations":[{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]},{"raw_affiliation_string":"Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, 1919-1 Tancha, Onna-son, Okinawa 904-0495, Japan","institution_ids":["https://openalex.org/I142637625"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5031054137"],"corresponding_institution_ids":["https://openalex.org/I142637625"],"apc_list":null,"apc_paid":null,"fwci":2.045,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.89076855,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5252","last_page":"5259"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11236","display_name":"Control Systems and Identification","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9614999890327454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hamilton\u2013jacobi\u2013bellman-equation","display_name":"Hamilton\u2013Jacobi\u2013Bellman equation","score":0.8724658489227295},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6305733919143677},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5863887071609497},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5320796966552734},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5306752324104309},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.508126974105835},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4630131423473358},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.44577834010124207},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.4366079568862915},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.43619465827941895},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.42352205514907837},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.3870745301246643},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.38474103808403015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34163913130760193},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.322198748588562}],"concepts":[{"id":"https://openalex.org/C196978813","wikidata":"https://www.wikidata.org/wiki/Q3302775","display_name":"Hamilton\u2013Jacobi\u2013Bellman equation","level":3,"score":0.8724658489227295},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6305733919143677},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5863887071609497},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5320796966552734},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5306752324104309},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.508126974105835},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4630131423473358},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.44577834010124207},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.4366079568862915},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.43619465827941895},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.42352205514907837},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3870745301246643},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.38474103808403015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34163913130760193},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.322198748588562},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra.2014.6907631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2014.6907631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1925816294","https://openalex.org/W1966514629","https://openalex.org/W1977114987","https://openalex.org/W1983264093","https://openalex.org/W1997543377","https://openalex.org/W2008026620","https://openalex.org/W2015383196","https://openalex.org/W2026659355","https://openalex.org/W2029946494","https://openalex.org/W2043962331","https://openalex.org/W2054681192","https://openalex.org/W2055647224","https://openalex.org/W2078639473","https://openalex.org/W2081030963","https://openalex.org/W2093524643","https://openalex.org/W2098524868","https://openalex.org/W2102803424","https://openalex.org/W2128152413","https://openalex.org/W2132351269","https://openalex.org/W2141791321","https://openalex.org/W2145060720","https://openalex.org/W2155919457","https://openalex.org/W2161905760","https://openalex.org/W2168342951","https://openalex.org/W2586680856","https://openalex.org/W3005581722","https://openalex.org/W4210791600","https://openalex.org/W4254475186","https://openalex.org/W4285719527","https://openalex.org/W4390708689","https://openalex.org/W6640290305","https://openalex.org/W6654061920","https://openalex.org/W6674872077","https://openalex.org/W6681439324","https://openalex.org/W6683845664"],"related_works":["https://openalex.org/W4239477580","https://openalex.org/W4255265352","https://openalex.org/W2921905705","https://openalex.org/W1632524629","https://openalex.org/W4292330635","https://openalex.org/W2902017027","https://openalex.org/W2766998270","https://openalex.org/W2950982235","https://openalex.org/W4306892756","https://openalex.org/W2052286527"],"abstract_inverted_index":{"Learning":[0],"complicated":[1],"behaviors":[2,129],"usually":[3],"involves":[4],"intensive":[5],"manual":[6],"tuning":[7],"and":[8,78,157],"expensive":[9],"computational":[10],"optimization":[11],"because":[12],"we":[13],"have":[14],"to":[15,42,55,58,96,126,134,147,149,164],"solve":[16],"a":[17,25,38,43,60,65,75,84,90,185],"nonlinear":[18,39],"Hamilton-Jacobi-Bellman":[19],"(HJB)":[20],"equation.":[21,46],"Recently,":[22],"Todorov":[23],"proposed":[24,122],"class":[26],"of":[27,48,67,142,159],"the":[28,49,97,107,116,131,150,155,160,170,178,190],"so-called":[29],"Linearly":[30],"solvable":[31],"Markov":[32],"Decision":[33],"Process":[34],"(LMDP)":[35],"which":[36,93],"converts":[37],"HJB":[40,51],"equation":[41,52],"linear":[44],"differential":[45],"Linearity":[47],"simplified":[50],"allows":[53],"us":[54],"apply":[56],"superposition":[57],"derive":[59],"new":[61,191],"composite":[62,161,179],"controller":[63],"from":[64],"set":[66],"learned":[68],"primitive":[69,144],"controllers.":[70],"However,":[71],"his":[72],"method":[73,92,123],"was":[74,80],"model-based":[76],"approach":[77,165],"it":[79],"not":[81],"evaluated":[82],"in":[83,119,136,154],"real":[85,137],"domain.":[86],"This":[87],"study":[88],"proposes":[89],"model-free":[91],"is":[94,124,146,163],"similar":[95],"Least":[98],"Squares":[99],"Temporal":[100],"Difference":[101],"(LSTD)":[102],"learning.":[103],"In":[104],"this":[105],"method,":[106],"exponentially":[108],"transformed":[109],"cost":[110],"function":[111],"can":[112,181],"be":[113,182],"regarded":[114],"as":[115,184],"discount":[117],"factor":[118],"LSTD.":[120],"Our":[121],"applied":[125],"learning":[127],"walking":[128],"with":[130],"quadruped":[132],"robot":[133,138],"evaluate":[135],"experiments.":[139],"The":[140],"goal":[141],"each":[143],"task":[145,162],"go":[148],"specific":[151],"target":[152,172],"position":[153],"environment":[156],"that":[158,177],"arbitrary":[166],"region":[167],"represented":[168],"by":[169],"primitives'":[171],"positions.":[173],"Experimental":[174],"results":[175],"show":[176],"policy":[180,188],"used":[183],"good":[186],"initial":[187],"for":[189],"task.":[192]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}