{"id":"https://openalex.org/W4225304231","doi":"https://doi.org/10.1109/lra.2022.3171915","title":"Unsupervised Reinforcement Learning for Transferable Manipulation Skill Discovery","display_name":"Unsupervised Reinforcement Learning for Transferable Manipulation Skill Discovery","publication_year":2022,"publication_date":"2022-05-06","ids":{"openalex":"https://openalex.org/W4225304231","doi":"https://doi.org/10.1109/lra.2022.3171915"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2022.3171915","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2022.3171915","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2204.13906","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032424238","display_name":"Daesol Cho","orcid":"https://orcid.org/0000-0002-4105-4422"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Daesol Cho","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-4105-4422","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072360628","display_name":"Ji-Gang Kim","orcid":"https://orcid.org/0000-0003-3381-5241"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jigang Kim","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-3381-5241","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073996122","display_name":"H. Jin Kim","orcid":"https://orcid.org/0000-0002-6819-1136"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"H. Jin Kim","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-6819-1136","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2197,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.89323333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"7","issue":"3","first_page":"7455","last_page":"7462"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7926785945892334},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7316532731056213},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7262555956840515},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.636650562286377},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4730871319770813},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4689934551715851},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.4543839693069458},{"id":"https://openalex.org/keywords/transferable-skills-analysis","display_name":"Transferable skills analysis","score":0.4401891231536865},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.422462522983551},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3904508948326111},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.35096102952957153},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08529797196388245}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7926785945892334},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7316532731056213},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7262555956840515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.636650562286377},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4730871319770813},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4689934551715851},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.4543839693069458},{"id":"https://openalex.org/C20574239","wikidata":"https://www.wikidata.org/wiki/Q7834033","display_name":"Transferable skills analysis","level":3,"score":0.4401891231536865},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.422462522983551},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3904508948326111},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.35096102952957153},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08529797196388245},{"id":"https://openalex.org/C120912362","wikidata":"https://www.wikidata.org/wiki/Q136822","display_name":"Higher education","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2022.3171915","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2022.3171915","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2204.13906","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.13906","pdf_url":"https://arxiv.org/pdf/2204.13906","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2204.13906","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.13906","pdf_url":"https://arxiv.org/pdf/2204.13906","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4699999988079071,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323103","display_name":"Agency for Defense Development","ror":"https://ror.org/05fhe0r85"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2575705757","https://openalex.org/W2963276097","https://openalex.org/W2963523627","https://openalex.org/W2964262254","https://openalex.org/W2996490626","https://openalex.org/W3005053211","https://openalex.org/W3040408533","https://openalex.org/W4319988532","https://openalex.org/W6717230150","https://openalex.org/W6717434760","https://openalex.org/W6734325300","https://openalex.org/W6735033012","https://openalex.org/W6739901393","https://openalex.org/W6740801417","https://openalex.org/W6747473740","https://openalex.org/W6748603076","https://openalex.org/W6756303580","https://openalex.org/W6756684005","https://openalex.org/W6762863188","https://openalex.org/W6764724164","https://openalex.org/W6768810269","https://openalex.org/W6769596995","https://openalex.org/W6771619369","https://openalex.org/W6771876938","https://openalex.org/W6771912484","https://openalex.org/W6774179783","https://openalex.org/W6775647304","https://openalex.org/W6780145335","https://openalex.org/W6784521679","https://openalex.org/W6789191384","https://openalex.org/W6841393079","https://openalex.org/W6849896277"],"related_works":["https://openalex.org/W2516534248","https://openalex.org/W2591217073","https://openalex.org/W2981850188","https://openalex.org/W4390949054","https://openalex.org/W2730711084","https://openalex.org/W4306904969","https://openalex.org/W2888402452","https://openalex.org/W4387816613","https://openalex.org/W3203657119","https://openalex.org/W4286952720"],"abstract_inverted_index":{"Current":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"in":[4,9,33,61,142],"robotics":[5],"often":[6],"experiences":[7],"difficulty":[8],"generalizing":[10],"to":[11,16,39,91,105,111,113,148],"new":[12],"downstream":[13,115,143],"tasks":[14,117,144],"due":[15],"the":[17,31,40,89,95,99,106,119,132,146],"innate":[18],"task-specific":[19,41],"training":[20],"paradigm.":[21],"To":[22],"alleviate":[23],"it,":[24],"unsupervised":[25,67],"RL,":[26],"a":[27,34],"framework":[28],"that":[29,74,128],"pre-trains":[30],"agent":[32,90],"task-agnostic":[35,121],"manner":[36],"without":[37,103],"access":[38,104],"reward,":[42,108],"leverages":[43],"active":[44],"exploration":[45,77],"for":[46,69],"distilling":[47],"diverse":[48,134],"experience":[49],"into":[50],"essential":[51],"skills":[52],"or":[53],"reusable":[54],"knowledge.":[55],"For":[56],"exploiting":[57],"such":[58],"benefits":[59],"also":[60,110],"robotic":[62,100],"manipulation,":[63],"we":[64,126],"propose":[65],"an":[66],"method":[68],"transferable":[70,82],"manipulation":[71,101,116],"skill":[72,83],"discovery":[73],"ties":[75],"structured":[76],"toward":[78],"interacting":[79,135],"behavior":[80,136],"and":[81,137],"learning.":[84],"It":[85],"not":[86],"only":[87],"enables":[88],"learn":[92],"interaction":[93],"behavior,":[94],"key":[96],"aspect":[97],"of":[98],"learning,":[102],"environment":[107],"but":[109],"generalize":[112],"arbitrary":[114],"with":[118],"learned":[120],"skills.":[122],"Through":[123],"comparative":[124],"experiments,":[125],"show":[127],"our":[129],"approach":[130],"achieves":[131],"most":[133],"significantly":[138],"improves":[139],"sample":[140],"efficiency":[141],"including":[145],"extension":[147],"multi-object,":[149],"multitask":[150],"problems.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2022-05-05T00:00:00"}
