{"id":"https://openalex.org/W4226458162","doi":"https://doi.org/10.1109/lra.2022.3173039","title":"Automating Reinforcement Learning With Example-Based Resets","display_name":"Automating Reinforcement Learning With Example-Based Resets","publication_year":2022,"publication_date":"2022-05-06","ids":{"openalex":"https://openalex.org/W4226458162","doi":"https://doi.org/10.1109/lra.2022.3173039"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2022.3173039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2022.3173039","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2204.02041","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072360628","display_name":"Ji-Gang Kim","orcid":"https://orcid.org/0000-0003-3381-5241"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jigang Kim","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-3381-5241","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032063647","display_name":"J. Hyeon Park","orcid":"https://orcid.org/0000-0001-8410-4568"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"J. hyeon Park","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0001-8410-4568","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032424238","display_name":"Daesol Cho","orcid":"https://orcid.org/0000-0002-4105-4422"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Daesol Cho","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-4105-4422","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073996122","display_name":"H. Jin Kim","orcid":"https://orcid.org/0000-0002-6819-1136"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"H. Jin Kim","raw_affiliation_strings":["Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","Automation and Systems Research Institute (ASRI), Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-6819-1136","affiliations":[{"raw_affiliation_string":"Department of Mechanical and Aerospace Engineering, Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Automation and Systems Research Institute (ASRI), Seoul, South Korea","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2486,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.82663374,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"7","issue":"3","first_page":"6606","last_page":"6613"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reset","display_name":"Reset (finance)","score":0.8876662850379944},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8678522706031799},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7845780253410339},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6065220832824707},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5987340807914734},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5514624118804932},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5224993824958801},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.4200475215911865},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09656468033790588}],"concepts":[{"id":"https://openalex.org/C2779795794","wikidata":"https://www.wikidata.org/wiki/Q7315343","display_name":"Reset (finance)","level":2,"score":0.8876662850379944},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8678522706031799},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7845780253410339},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6065220832824707},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5987340807914734},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5514624118804932},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5224993824958801},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.4200475215911865},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09656468033790588},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C106159729","wikidata":"https://www.wikidata.org/wiki/Q2294553","display_name":"Financial economics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2022.3173039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2022.3173039","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2204.02041","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.02041","pdf_url":"https://arxiv.org/pdf/2204.02041","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2204.02041","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.02041","pdf_url":"https://arxiv.org/pdf/2204.02041","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7426400328","display_name":null,"funder_award_id":"202013D05","funder_id":"https://openalex.org/F4320322014","funder_display_name":"Ministry of Food and Drug Safety"}],"funders":[{"id":"https://openalex.org/F4320318847","display_name":"Korea Medical Device Development Fund","ror":null},{"id":"https://openalex.org/F4320321681","display_name":"Ministry of Trade, Industry and Energy","ror":"https://ror.org/008nkqk13"},{"id":"https://openalex.org/F4320322014","display_name":"Ministry of Food and Drug Safety","ror":"https://ror.org/01f7dp456"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W2201581102","https://openalex.org/W2221677593","https://openalex.org/W2342662072","https://openalex.org/W2529658650","https://openalex.org/W2575705757","https://openalex.org/W2781585732","https://openalex.org/W2806130867","https://openalex.org/W2807588596","https://openalex.org/W2810785043","https://openalex.org/W2963092423","https://openalex.org/W2963311874","https://openalex.org/W2963780574","https://openalex.org/W2963864421","https://openalex.org/W2982316857","https://openalex.org/W2990747716","https://openalex.org/W2995372087","https://openalex.org/W2996037775","https://openalex.org/W3007553593","https://openalex.org/W3008970887","https://openalex.org/W3020712699","https://openalex.org/W3038298277","https://openalex.org/W3040408533","https://openalex.org/W3091677803","https://openalex.org/W3105810795","https://openalex.org/W3150718622","https://openalex.org/W3206200647","https://openalex.org/W4287066693","https://openalex.org/W4287123092","https://openalex.org/W4287257196","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6704571135","https://openalex.org/W6741302124","https://openalex.org/W6746595151","https://openalex.org/W6747387971","https://openalex.org/W6752244597","https://openalex.org/W6752338937","https://openalex.org/W6771179988","https://openalex.org/W6771771425","https://openalex.org/W6772005887","https://openalex.org/W6774126978","https://openalex.org/W6774967489","https://openalex.org/W6780559895","https://openalex.org/W6785599724","https://openalex.org/W6792301857","https://openalex.org/W6796869622","https://openalex.org/W6802752636"],"related_works":["https://openalex.org/W350273603","https://openalex.org/W2393495588","https://openalex.org/W96259911","https://openalex.org/W2168225754","https://openalex.org/W4385608460","https://openalex.org/W4231704780","https://openalex.org/W2370772865","https://openalex.org/W1528611913","https://openalex.org/W2000034628","https://openalex.org/W2387487224"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,21,99],"learning":[2,22,100],"has":[3],"enabled":[4],"robots":[5],"to":[6,15,33,45,64,97,111,124,141,163,173],"learn":[7,142],"motor":[8],"skills":[9],"from":[10,50,143],"environmental":[11],"interactions":[12],"with":[13],"minimal":[14],"no":[16],"prior":[17],"knowledge.":[18],"However,":[19],"existing":[20],"algorithms":[23],"assume":[24],"an":[25,95,106],"episodic":[26],"setting,":[27],"in":[28,71,113],"which":[29,82],"the":[30,40,48,84,134,158,170],"agent":[31,108,119,160],"resets":[32,127,166],"a":[34,114,122,131,146],"fixed":[35],"initial":[36],"state":[37],"distribution":[38],"at":[39],"end":[41],"of":[42,86,148],"each":[43],"episode,":[44],"successfully":[46,161],"train":[47],"agents":[49],"repeated":[51],"trials.":[52],"Such":[53],"reset":[54,112,118,123,159],"mechanism,":[55],"while":[56],"trivial":[57],"for":[58,66,133],"simulated":[59,149],"tasks,":[60],"can":[61],"be":[62],"challenging":[63],"provide":[65],"real-world":[67,151],"robotics":[68],"tasks.":[69],"Resets":[70],"robotic":[72],"systems":[73],"often":[74],"require":[75],"extensive":[76],"human":[77],"supervision":[78],"and":[79,128,150,155],"task-specific":[80],"workarounds,":[81],"contradicts":[83],"goal":[85],"autonomous":[87],"robot":[88],"learning.":[89],"In":[90],"this":[91],"paper,":[92],"we":[93],"propose":[94],"extension":[96],"conventional":[98],"towards":[101],"greater":[102],"autonomy":[103],"by":[104],"introducing":[105],"additional":[107],"that":[109,157],"learns":[110,162],"self-supervised":[115],"manner.":[116],"The":[117],"preemptively":[120],"triggers":[121],"prevent":[125],"manual":[126,165],"implicitly":[129],"imposes":[130],"curriculum":[132],"forward":[135,171],"agent.":[136],"We":[137],"apply":[138],"our":[139],"method":[140],"scratch":[144],"on":[145],"suite":[147],"continuous":[152],"control":[153],"tasks":[154],"demonstrate":[156],"reduce":[164],"whilst":[167],"also":[168],"allowing":[169],"policy":[172],"improve":[174],"gradually":[175],"over":[176],"time.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
