{"id":"https://openalex.org/W2271318666","doi":"https://doi.org/10.1007/s10015-015-0260-7","title":"EM-based policy hyper parameter exploration: application to standing and balancing of a two-wheeled smartphone robot","display_name":"EM-based policy hyper parameter exploration: application to standing and balancing of a two-wheeled smartphone robot","publication_year":2016,"publication_date":"2016-01-25","ids":{"openalex":"https://openalex.org/W2271318666","doi":"https://doi.org/10.1007/s10015-015-0260-7","mag":"2271318666"},"language":"en","primary_location":{"id":"doi:10.1007/s10015-015-0260-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10015-015-0260-7","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf","source":{"id":"https://openalex.org/S104439334","display_name":"Artificial Life and Robotics","issn_l":"1433-5298","issn":["1433-5298","1614-7456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Life and Robotics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002996776","display_name":"Jiexin Wang","orcid":"https://orcid.org/0000-0002-3286-3711"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Jiexin Wang","raw_affiliation_strings":["Kyoto University, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031054137","display_name":"Eiji Uchibe","orcid":"https://orcid.org/0000-0001-7908-0258"},"institutions":[{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Eiji Uchibe","raw_affiliation_strings":["Okinawa Institute of Science and Technology, Okinawa, Japan"],"affiliations":[{"raw_affiliation_string":"Okinawa Institute of Science and Technology, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004840638","display_name":"Kenji Doya","orcid":"https://orcid.org/0000-0002-2446-6820"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]},{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kenji Doya","raw_affiliation_strings":["Kyoto University, Kyoto, Japan","Okinawa Institute of Science and Technology, Okinawa, Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Okinawa Institute of Science and Technology, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5002996776"],"corresponding_institution_ids":["https://openalex.org/I22299242"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.4417,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.77715907,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"21","issue":"1","first_page":"125","last_page":"131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7577121257781982},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6185025572776794},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.566453754901886},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5551007986068726},{"id":"https://openalex.org/keywords/inverted-pendulum","display_name":"Inverted pendulum","score":0.5216389298439026},{"id":"https://openalex.org/keywords/swing","display_name":"Swing","score":0.5195139646530151},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5015320777893066},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4630138576030731},{"id":"https://openalex.org/keywords/classification-of-discontinuities","display_name":"Classification of discontinuities","score":0.4224073886871338},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.41631895303726196},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3537144064903259},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.33239758014678955},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19020554423332214},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.14719849824905396},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.1406615674495697}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7577121257781982},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6185025572776794},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.566453754901886},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5551007986068726},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.5216389298439026},{"id":"https://openalex.org/C65655974","wikidata":"https://www.wikidata.org/wiki/Q14867674","display_name":"Swing","level":2,"score":0.5195139646530151},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5015320777893066},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4630138576030731},{"id":"https://openalex.org/C15627037","wikidata":"https://www.wikidata.org/wiki/Q541961","display_name":"Classification of discontinuities","level":2,"score":0.4224073886871338},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.41631895303726196},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3537144064903259},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.33239758014678955},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19020554423332214},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.14719849824905396},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.1406615674495697},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10015-015-0260-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10015-015-0260-7","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf","source":{"id":"https://openalex.org/S104439334","display_name":"Artificial Life and Robotics","issn_l":"1433-5298","issn":["1433-5298","1614-7456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Life and Robotics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10015-015-0260-7","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10015-015-0260-7","pdf_url":"https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf","source":{"id":"https://openalex.org/S104439334","display_name":"Artificial Life and Robotics","issn_l":"1433-5298","issn":["1433-5298","1614-7456"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Life and Robotics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1939940645","display_name":"\u671d\u9bae\u8a9e\u8af8\u65b9\u8a00\u306e\u751f\u6210\u30a2\u30af\u30bb\u30f3\u30c8\u8ad6\u7684\u7814\u7a76","funder_award_id":"261207","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4652398879","display_name":"\u30c1\u30d9\u30c3\u30c8\u5bc6\u6559\u3068\u65e5\u672c\u5bc6\u6559\u306e\u5b97\u6559\u5100\u793c\u306b\u95a2\u3059\u308b\u6bd4\u8f03\u7814\u7a76","funder_award_id":"245002","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4789093758","display_name":null,"funder_award_id":"MEXT/JSPS","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4996570636","display_name":"\u4e0d\u898f\u5247\u8377\u91cd\u6b6f\u8eca\u75b2\u52b4\u8a66\u9a13\u6a5f\u306e\u8a66\u4f5c\u7814\u7a76","funder_award_id":"50024","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5807290578","display_name":"Information theoretic optimization of intrinsic rewards for reinforcement learning","funder_award_id":"24500249","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G710017384","display_name":"\u30ed\u30b1\u30c3\u30c8\u306e\u69cb\u9020\u89e3\u6790\u304a\u3088\u3073\u69cb\u9020\u6700\u9069\u5316\u306b\u95a2\u3059\u308b\u7814\u7a76","funder_award_id":"45002","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8109110001","display_name":"\u7adc\u9020\u5bfa\u6c0f\u306e\u9818\u56fd\u3088\u308a\u521d\u671f\u306e\u4f50\u8cc0\u85e9\u306b\u81f3\u308b\u9593\u306e\u6b66\u5bb6\u793e\u4f1a\u306b\u304a\u3051\u308b\u76f8\u7d9a\u5236\u3068\u77e5\u884c\u306e\u5f62\u614b\u306b\u3064\u3044\u3066","funder_award_id":"12072","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8726382939","display_name":"\u30d6\u30eb\u30fc\u30d9\u30ea\u30fc\u306e\u751f\u7523\u958b\u767a\u306b\u95a2\u3059\u308b\u7814\u7a76","funder_award_id":"61207","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320320912","display_name":"Ministry of Education, Culture, Sports, Science and Technology","ror":"https://ror.org/048rj2z13"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2271318666.pdf","grobid_xml":"https://content.openalex.org/works/W2271318666.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1925816294","https://openalex.org/W1971492381","https://openalex.org/W1989144314","https://openalex.org/W2012587148","https://openalex.org/W2109169869","https://openalex.org/W2112036188","https://openalex.org/W2113501460","https://openalex.org/W2119717200","https://openalex.org/W2127107099","https://openalex.org/W2137104525","https://openalex.org/W2146737184"],"related_works":["https://openalex.org/W2360051520","https://openalex.org/W2798244654","https://openalex.org/W3168108534","https://openalex.org/W34871393","https://openalex.org/W1972096828","https://openalex.org/W1486689224","https://openalex.org/W2529137940","https://openalex.org/W2614575562","https://openalex.org/W2372645633","https://openalex.org/W2689391174"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,35,47,108,127],"novel":[4],"policy":[5,37,43,53,65],"search":[6],"algorithm":[7],"called":[8],"EM-based":[9,28,61],"Policy":[10,21],"Hyper":[11],"Parameter":[12,24],"Exploration":[13,25],"(EPHE)":[14],"which":[15],"integrates":[16],"two":[17],"reinforcement":[18],"learning":[19,81,120,122],"algorithms:":[20],"Gradient":[22],"with":[23,41,129],"(PGPE)":[26],"and":[27,57,77,101,105],"Reward-Weighted":[29,62],"Regression.":[30],"Like":[31],"PGPE,":[32],"EPHE":[33,116],"evaluates":[34],"deterministic":[36],"in":[38,91],"each":[39],"episode":[40],"the":[42,52,64,80,92],"parameters":[44,55,67],"sampled":[45],"from":[46],"prior":[48],"distribution":[49],"given":[50],"by":[51,70],"hyper":[54,66],"(mean":[56],"variance).":[58],"Based":[59],"on":[60],"Regression,":[63],"are":[68,83],"updated":[69],"reward-weighted":[71],"averaging":[72],"so":[73],"that":[74,115],"gradient":[75],"calculation":[76],"tuning":[78,124],"of":[79,94,103,107],"rate":[82,123],"not":[84],"required.":[85],"The":[86],"proposed":[87],"method":[88],"is":[89],"tested":[90],"benchmarks":[93],"pendulum":[95],"swing-up":[96],"task,":[97],"cart-pole":[98],"balancing":[99,106],"task":[100,128],"simulation":[102],"standing":[104],"two-wheeled":[109],"smartphone":[110],"robot.":[111],"Experimental":[112],"results":[113],"show":[114],"can":[117],"achieve":[118],"efficient":[119],"without":[121],"even":[125],"for":[126],"discontinuities.":[130]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
