{"id":"https://openalex.org/W3206746211","doi":"https://doi.org/10.1109/icra48506.2021.9561017","title":"Distilling a Hierarchical Policy for Planning and Control via Representation and Reinforcement Learning","display_name":"Distilling a Hierarchical Policy for Planning and Control via Representation and Reinforcement Learning","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3206746211","doi":"https://doi.org/10.1109/icra48506.2021.9561017","mag":"3206746211"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9561017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003306942","display_name":"Jung-Su Ha","orcid":"https://orcid.org/0000-0002-1024-4119"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jung-Su Ha","raw_affiliation_strings":["MPI for Intelligent Systems","TU Berlin"],"affiliations":[{"raw_affiliation_string":"MPI for Intelligent Systems","institution_ids":[]},{"raw_affiliation_string":"TU Berlin","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100624080","display_name":"Youngjin Park","orcid":"https://orcid.org/0000-0002-7426-0142"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Young-Jin Park","raw_affiliation_strings":["NAVER CLOVA, NAVER Corp"],"affiliations":[{"raw_affiliation_string":"NAVER CLOVA, NAVER Corp","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090276849","display_name":"Hyeok-Joo Chae","orcid":"https://orcid.org/0000-0002-3028-0010"},"institutions":[{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]},{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Hyeok-Joo Chae","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048997069","display_name":"Soon-Seo Park","orcid":null},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]},{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Soon-Seo Park","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049291168","display_name":"Han\u2010Lim Choi","orcid":"https://orcid.org/0000-0003-3985-0419"},"institutions":[{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]},{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Han-Lim Choi","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5003306942"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":0.4079,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69289748,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"550","issue":null,"first_page":"4459","last_page":"4466"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11685","display_name":"Zebrafish Biomedical Research Applications","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9650999903678894,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8315093517303467},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7561198472976685},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.7096555829048157},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6865794062614441},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.626051127910614},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5785865187644958},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5656017065048218},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5514501333236694},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.5481989979743958},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.4863074719905853},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4702640473842621},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4513879120349884},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4282296597957611},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.34361571073532104},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10508909821510315},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0756756067276001}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8315093517303467},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7561198472976685},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.7096555829048157},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6865794062614441},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.626051127910614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5785865187644958},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5656017065048218},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5514501333236694},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.5481989979743958},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.4863074719905853},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4702640473842621},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4513879120349884},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4282296597957611},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34361571073532104},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10508909821510315},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0756756067276001},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra48506.2021.9561017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W1424654272","https://openalex.org/W1637456626","https://openalex.org/W1752690783","https://openalex.org/W1959608418","https://openalex.org/W2018705428","https://openalex.org/W2029776687","https://openalex.org/W2098774185","https://openalex.org/W2107464055","https://openalex.org/W2145339207","https://openalex.org/W2155772159","https://openalex.org/W2188365844","https://openalex.org/W2296360731","https://openalex.org/W2606433045","https://openalex.org/W2732671178","https://openalex.org/W2736601468","https://openalex.org/W2736887377","https://openalex.org/W2738778707","https://openalex.org/W2751471435","https://openalex.org/W2766447205","https://openalex.org/W2781726626","https://openalex.org/W2785342287","https://openalex.org/W2788904251","https://openalex.org/W2796290181","https://openalex.org/W2799151646","https://openalex.org/W2803281228","https://openalex.org/W2806098286","https://openalex.org/W2882052894","https://openalex.org/W2883332136","https://openalex.org/W2889731659","https://openalex.org/W2890208753","https://openalex.org/W2900152462","https://openalex.org/W2900879308","https://openalex.org/W2902711054","https://openalex.org/W2918645572","https://openalex.org/W2920362155","https://openalex.org/W2950614095","https://openalex.org/W2954142106","https://openalex.org/W2963166838","https://openalex.org/W2963286043","https://openalex.org/W2963344681","https://openalex.org/W2963414638","https://openalex.org/W2963438456","https://openalex.org/W2963722050","https://openalex.org/W2963960193","https://openalex.org/W2964077562","https://openalex.org/W2964232608","https://openalex.org/W2964338130","https://openalex.org/W2970990801","https://openalex.org/W3021208093","https://openalex.org/W3031840745","https://openalex.org/W3034888459","https://openalex.org/W3093010610","https://openalex.org/W3102334304","https://openalex.org/W3103834977","https://openalex.org/W3104515094","https://openalex.org/W4255064568","https://openalex.org/W4288021424","https://openalex.org/W4288289109","https://openalex.org/W4288348579","https://openalex.org/W4289294484","https://openalex.org/W4298206671","https://openalex.org/W6640963894","https://openalex.org/W6674884181","https://openalex.org/W6687045409","https://openalex.org/W6692405165","https://openalex.org/W6711952718","https://openalex.org/W6728354068","https://openalex.org/W6736368053","https://openalex.org/W6740792189","https://openalex.org/W6741002519","https://openalex.org/W6744063608","https://openalex.org/W6747473740","https://openalex.org/W6748566876","https://openalex.org/W6748600884","https://openalex.org/W6748603076","https://openalex.org/W6749821205","https://openalex.org/W6750186571","https://openalex.org/W6751494529","https://openalex.org/W6752089545","https://openalex.org/W6754184789","https://openalex.org/W6754302822","https://openalex.org/W6756168450","https://openalex.org/W6756256016","https://openalex.org/W6756675569","https://openalex.org/W6760300019","https://openalex.org/W6760405395","https://openalex.org/W6760439459","https://openalex.org/W6764724164","https://openalex.org/W6767649332","https://openalex.org/W6772121735","https://openalex.org/W6841393079"],"related_works":["https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2778153218","https://openalex.org/W1531601525","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886"],"abstract_inverted_index":{"We":[0,138],"present":[1],"a":[2,19,38,42,80,85,99,155,167],"hierarchical":[3,39],"planning":[4,81,114],"and":[5,16,48,97,148,152,159,163,173],"control":[6,101],"framework":[7,52,142],"that":[8,62,83,90,107],"enables":[9],"an":[10,26,94],"agent":[11],"to":[12,18,93,128,132,180],"perform":[13],"various":[14],"tasks":[15,45,134],"adapt":[17,131],"new":[20,133],"task":[21],"flexibly.":[22],"Rather":[23],"than":[24],"learning":[25],"individual":[27],"policy":[28,40,72,123,176],"for":[29,154],"each":[30],"particular":[31],"task,":[32],"the":[33,56,105,109,113,121,140,174],"proposed":[34,141],"framework,":[35],"DISH,":[36],"distills":[37],"from":[41],"set":[43],"of":[44,58,74,77,87,170,183],"by":[46],"representation":[47],"reinforcement":[49],"learning.":[50],"The":[51,70],"is":[53,115,177],"based":[54],"on":[55],"idea":[57],"latent":[59,68,88,119,150],"variable":[60],"models":[61],"represent":[63],"high-dimensional":[64],"observations":[65],"using":[66],"low-dimensional":[67,118],"variables.":[69],"resulting":[71,175],"consists":[73],"two":[75],"levels":[76],"hierarchy:":[78],"(i)":[79],"module":[82],"reasons":[84],"sequence":[86],"intentions":[89],"would":[91],"lead":[92],"optimistic":[95],"future":[96],"(ii)":[98],"feedback":[100],"policy,":[102],"shared":[103],"across":[104],"tasks,":[106,172,184],"executes":[108],"inferred":[110],"intention.":[111],"Because":[112],"performed":[116],"in":[117,187],"space,":[120],"learned":[122],"can":[124,143],"immediately":[125],"be":[126],"used":[127],"solve":[129],"or":[130],"without":[135],"additional":[136],"training.":[137],"demonstrate":[139],"learn":[144],"compact":[145],"representations":[146],"(3-":[147],"1-dimensional":[149],"states":[151],"commands":[153],"humanoid":[156],"with":[157],"197-":[158],"36-dimensional":[160],"state":[161],"features":[162],"actions)":[164],"while":[165],"solving":[166],"small":[168],"number":[169],"imitation":[171],"directly":[178],"applicable":[179],"other":[181],"types":[182],"i.e.,":[185],"navigation":[186],"cluttered":[188],"environments.":[189]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
