{"id":"https://openalex.org/W2796496570","doi":"https://doi.org/10.1109/iros.2018.8593588","title":"Model-Based Action Exploration for Learning Dynamic Motion Skills","display_name":"Model-Based Action Exploration for Learning Dynamic Motion Skills","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2796496570","doi":"https://doi.org/10.1109/iros.2018.8593588","mag":"2796496570"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2018.8593588","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2018.8593588","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045351810","display_name":"Glen Berseth","orcid":"https://orcid.org/0000-0001-7351-8028"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Glen Berseth","raw_affiliation_strings":["Faculty of Computer Science, University of British Columbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087713896","display_name":"Alex Kyriazis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alex Kyriazis","raw_affiliation_strings":["The University of British Columbia Faculty of Medicine, Vancouver, BC, CA"],"affiliations":[{"raw_affiliation_string":"The University of British Columbia Faculty of Medicine, Vancouver, BC, CA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001813872","display_name":"Ivan Zinin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ivan Zinin","raw_affiliation_strings":["The University of British Columbia Faculty of Medicine, Vancouver, BC, CA"],"affiliations":[{"raw_affiliation_string":"The University of British Columbia Faculty of Medicine, Vancouver, BC, CA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109401104","display_name":"William Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"William Choi","raw_affiliation_strings":["The University of British Columbia Faculty of Medicine, Vancouver, BC, CA"],"affiliations":[{"raw_affiliation_string":"The University of British Columbia Faculty of Medicine, Vancouver, BC, CA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052748246","display_name":"Michiel van de Panne","orcid":"https://orcid.org/0000-0002-9123-3672"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Michiel van de Panne","raw_affiliation_strings":["Faculty of Computer Science, University of British Columbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5045351810"],"corresponding_institution_ids":["https://openalex.org/I141945490"],"apc_list":null,"apc_paid":null,"fwci":0.3258,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.65812499,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"abs 1502 3167","issue":null,"first_page":"1540","last_page":"1546"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.753399670124054},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7109411954879761},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.7019116878509521},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6782245635986328},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.612378716468811},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5634007453918457},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5083717703819275},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4811462163925171}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.753399670124054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7109411954879761},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.7019116878509521},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6782245635986328},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.612378716468811},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5634007453918457},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5083717703819275},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4811462163925171},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros.2018.8593588","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2018.8593588","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W755046805","https://openalex.org/W1191599655","https://openalex.org/W1491843047","https://openalex.org/W1771410628","https://openalex.org/W1836465849","https://openalex.org/W1980035368","https://openalex.org/W2072752020","https://openalex.org/W2099471712","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2165150801","https://openalex.org/W2173248099","https://openalex.org/W2258731934","https://openalex.org/W2290354866","https://openalex.org/W2514775068","https://openalex.org/W2534060593","https://openalex.org/W2546975091","https://openalex.org/W2558634851","https://openalex.org/W2567374473","https://openalex.org/W2595180411","https://openalex.org/W2604960773","https://openalex.org/W2623491082","https://openalex.org/W2724169821","https://openalex.org/W2726187156","https://openalex.org/W2736601468","https://openalex.org/W2739330054","https://openalex.org/W2756350131","https://openalex.org/W2949117887","https://openalex.org/W2950471160","https://openalex.org/W2962872206","https://openalex.org/W2963073614","https://openalex.org/W2963146015","https://openalex.org/W2963446712","https://openalex.org/W2963604043","https://openalex.org/W2963616477","https://openalex.org/W2963864421","https://openalex.org/W2964006217","https://openalex.org/W2964043796","https://openalex.org/W2964174623","https://openalex.org/W2964294881","https://openalex.org/W2974778612","https://openalex.org/W3021208093","https://openalex.org/W4214717370","https://openalex.org/W4294578467","https://openalex.org/W4295766944","https://openalex.org/W4300892751","https://openalex.org/W4302570325","https://openalex.org/W4320013936","https://openalex.org/W6639949747","https://openalex.org/W6683204974","https://openalex.org/W6684205842","https://openalex.org/W6696324988"],"related_works":["https://openalex.org/W4296209631","https://openalex.org/W4306904969","https://openalex.org/W3097449145","https://openalex.org/W2561617217","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W4311431118","https://openalex.org/W4382240674"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2,100,162],"has":[3,15,30],"achieved":[4],"great":[5],"strides":[6],"in":[7],"solving":[8],"challenging":[9],"motion":[10],"control":[11],"tasks.":[12],"Recently,":[13],"there":[14,29],"been":[16,31],"significant":[17],"work":[18,33],"on":[19,34,96],"methods":[20,73],"for":[21,52],"exploiting":[22],"the":[23,39,48,64,84,87,107,125,156],"data":[24,40],"gathered":[25],"during":[26],"training,":[27],"but":[28],"less":[32],"how":[35],"to":[36,41,94,105,169],"best":[37],"generate":[38],"learn":[42],"from.":[43],"For":[44],"continuous":[45],"action":[46,66,88,158],"domains,":[47],"most":[49],"common":[50],"method":[51,154],"generating":[53],"exploratory":[54,157],"actions":[55,144],"involves":[56],"sampling":[57],"from":[58],"a":[59,69,101,116,121,148],"Gaussian":[60],"distribution":[61],"centred":[62],"around":[63],"mean":[65],"output":[67],"by":[68],"policy.":[70],"Although":[71],"these":[72],"can":[74,91],"be":[75,92],"quite":[76],"capable,":[77],"they":[78],"do":[79],"not":[80],"scale":[81],"well":[82],"with":[83],"dimensionality":[85],"of":[86,114,124,140,151],"space,":[89,159],"and":[90,142,164,176],"dangerous":[93],"apply":[95],"hardware.":[97],"We":[98],"consider":[99],"forward":[102],"dynamics":[103],"model":[104,134],"predict":[106],"result,":[108],"(x":[109,127],"<sub":[110,128],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[111,129],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">t+1</sub>":[112],"),":[113],"taking":[115],"particular":[117],"action,":[118],"(u),":[119],"given":[120],"specific":[122],"observation":[123],"state,":[126],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">t</sub>":[130],").":[131],"With":[132],"this":[133],"we":[135,145],"perform":[136],"internal":[137],"lookahead":[138],"predictions":[139],"outcomes":[141],"seek":[143],"believe":[146],"have":[147],"reasonable":[149],"chance":[150],"success.":[152],"This":[153],"alters":[155],"thereby":[160],"increasing":[161],"speed":[163],"enables":[165],"higher":[166],"quality":[167],"solutions":[168],"difficult":[170],"problems,":[171],"such":[172],"as":[173],"robotic":[174],"locomotion":[175],"juggling.":[177]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
