{"id":"https://openalex.org/W4392014568","doi":"https://doi.org/10.1109/lra.2024.3368231","title":"Skill-Critic: Refining Learned Skills for Hierarchical Reinforcement Learning","display_name":"Skill-Critic: Refining Learned Skills for Hierarchical Reinforcement Learning","publication_year":2024,"publication_date":"2024-02-21","ids":{"openalex":"https://openalex.org/W4392014568","doi":"https://doi.org/10.1109/lra.2024.3368231"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3368231","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3368231","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102834661","display_name":"Ce Hao","orcid":"https://orcid.org/0009-0000-7653-9713"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ce Hao","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0009-0000-7653-9713","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044517807","display_name":"Catherine Weaver","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Catherine Weaver","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-8521-9305","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101867083","display_name":"Chen Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Tang","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-7536-9983","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006805979","display_name":"Kenta Kawamoto","orcid":"https://orcid.org/0000-0001-7276-2766"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kenta Kawamoto","raw_affiliation_strings":["Sony Research Inc., Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0001-7276-2766","affiliations":[{"raw_affiliation_string":"Sony Research Inc., Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064077634","display_name":"Masayoshi Tomizuka","orcid":"https://orcid.org/0000-0003-0206-6639"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Masayoshi Tomizuka","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-0206-6639","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101770873","display_name":"Wei Zhan","orcid":"https://orcid.org/0000-0002-1474-1200"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Zhan","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-1474-1200","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102834661"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":2.9802,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.91703024,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"9","issue":"4","first_page":"3625","last_page":"3632"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/refining","display_name":"Refining (metallurgy)","score":0.8368098735809326},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.679776668548584},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6466116905212402},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45637500286102295},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3109254539012909},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2500517666339874},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.12549740076065063},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.06587380170822144},{"id":"https://openalex.org/keywords/metallurgy","display_name":"Metallurgy","score":0.048135221004486084}],"concepts":[{"id":"https://openalex.org/C60044698","wikidata":"https://www.wikidata.org/wiki/Q1283324","display_name":"Refining (metallurgy)","level":2,"score":0.8368098735809326},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.679776668548584},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6466116905212402},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45637500286102295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3109254539012909},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2500517666339874},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.12549740076065063},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.06587380170822144},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.048135221004486084}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2024.3368231","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3368231","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1959608418","https://openalex.org/W1986014385","https://openalex.org/W2109910161","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2964227312","https://openalex.org/W3016525976","https://openalex.org/W3037620198","https://openalex.org/W3068678661","https://openalex.org/W3168892396","https://openalex.org/W3174733757","https://openalex.org/W3206495137","https://openalex.org/W4210870706","https://openalex.org/W4281550413","https://openalex.org/W4286611079","https://openalex.org/W4287756699","https://openalex.org/W4323927473","https://openalex.org/W4401414913","https://openalex.org/W6640963894","https://openalex.org/W6740801417","https://openalex.org/W6747473740","https://openalex.org/W6748566876","https://openalex.org/W6748603076","https://openalex.org/W6759871227","https://openalex.org/W6761754428","https://openalex.org/W6764724164","https://openalex.org/W6767649332","https://openalex.org/W6776601253","https://openalex.org/W6779656125","https://openalex.org/W6779728822","https://openalex.org/W6784712800","https://openalex.org/W6785876034","https://openalex.org/W6810488170","https://openalex.org/W6838968391","https://openalex.org/W6839744550","https://openalex.org/W6845025365","https://openalex.org/W6849619839","https://openalex.org/W6850790624","https://openalex.org/W6850995934","https://openalex.org/W6858048387"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W1595345252","https://openalex.org/W2392526918","https://openalex.org/W2362540361","https://openalex.org/W2019560916","https://openalex.org/W2361983698","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2347697528"],"abstract_inverted_index":{"Hierarchical":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"can":[4,52],"accelerate":[5],"long-horizon":[6],"decision-making":[7],"by":[8,99],"temporally":[9],"abstracting":[10],"a":[11,36,63,122],"policy":[12,41,51,76,111,138],"into":[13],"multiple":[14,117],"levels.":[15],"Promising":[16],"results":[17],"in":[18,77,116,128],"sparse":[19],"reward":[20],"environments":[21],"have":[22],"been":[23],"seen":[24],"with":[25,79],"<italic":[26,68],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[27,69],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">skills</i>":[28],",":[29],"i.e.":[30],"sequences":[31],"of":[32],"primitive":[33],"actions.":[34],"Typically,":[35],"skill":[37,81],"latent":[38,101],"space":[39,102],"and":[40,90,97,140],"are":[42,95,143],"discovered":[43],"from":[44,104],"offline":[45,105],"data.":[46],"However,":[47],"the":[48,67,74,88,100,109],"resulting":[49],"low-level":[50,75,89,137],"be":[53],"unreliable":[54],"due":[55],"to":[56,72,107],"low-coverage":[57],"demonstrations":[58,106],"or":[59],"distribution":[60],"shifts.":[61],"As":[62],"solution,":[64],"we":[65],"propose":[66],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Skill-Critic</i>":[70],"algorithm":[71,85],"fine-tune":[73],"conjunction":[78],"high-level":[80,91],"selection.":[82],"Our":[83],"Skill-Critic":[84,115],"optimizes":[86],"both":[87],"policies;":[92],"these":[93],"policies":[94],"initialized":[96],"regularized":[98],"learned":[103],"guide":[108],"parallel":[110],"optimization.":[112],"We":[113],"validate":[114],"sparse-reward":[118,124],"RL":[119],"environments,":[120],"including":[121],"new":[123],"autonomous":[125],"racing":[126],"task":[127],"Gran":[129],"Turismo":[130],"Sport.":[131],"The":[132],"experiments":[133],"show":[134],"that":[135],"Skill-Critic's":[136],"fine-tuning":[139],"demonstration-guided":[141],"regularization":[142],"essential":[144],"for":[145],"good":[146],"performance.":[147]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
