{"id":"https://openalex.org/W4416750449","doi":"https://doi.org/10.1109/iros60139.2025.11246668","title":"SimLauncher: Launching Sample-Efficient Real-World Robotic Reinforcement Learning via Simulation Pre-Training","display_name":"SimLauncher: Launching Sample-Efficient Real-World Robotic Reinforcement Learning via Simulation Pre-Training","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416750449","doi":"https://doi.org/10.1109/iros60139.2025.11246668"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246668","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102718289","display_name":"Mingdong Wu","orcid":"https://orcid.org/0009-0007-9120-4621"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingdong Wu","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113890422","display_name":"Lehong Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lehong Wu","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101293264","display_name":"Yizhuo Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yizhuo Wu","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055500263","display_name":"Weiyao Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiyao Huang","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039486473","display_name":"Hongwei Fan","orcid":"https://orcid.org/0000-0003-0294-8869"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongwei Fan","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102652325","display_name":"Zheyuan Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheyuan Hu","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101608437","display_name":"Haoran Geng","orcid":"https://orcid.org/0000-0001-8375-7196"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoran Geng","raw_affiliation_strings":["University of California,Berkeley"],"affiliations":[{"raw_affiliation_string":"University of California,Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001998141","display_name":"Jinzhou Li","orcid":"https://orcid.org/0000-0001-8555-1714"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinzhou Li","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiahe Ying","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahe Ying","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Long Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Yang","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018250214","display_name":"Yuanpei Chen","orcid":"https://orcid.org/0000-0002-0033-492X"},"institutions":[{"id":"https://openalex.org/I4210142695","display_name":"American Jewish Joint Distribution Committee","ror":"https://ror.org/040an9w95","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210142695"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuanpei Chen","raw_affiliation_strings":["PKU-Psibot Joint Lab"],"affiliations":[{"raw_affiliation_string":"PKU-Psibot Joint Lab","institution_ids":["https://openalex.org/I4210142695"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389349","display_name":"Dong Hao","orcid":"https://orcid.org/0000-0002-1476-2861"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Dong","raw_affiliation_strings":["Peking University,Center on Frontiers of Computing Studies, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,Center on Frontiers of Computing Studies, School of Computer Science","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5102718289"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19055847,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7933","last_page":"7940"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5127000212669373,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5127000212669373,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.23720000684261322,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10982","display_name":"Motor Control and Adaptation","score":0.08810000121593475,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7964000105857849},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6140000224113464},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.42570000886917114},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4106000065803528},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.40369999408721924},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.3919999897480011},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.3781000077724457}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7964000105857849},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7105000019073486},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6140000224113464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5559999942779541},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.42570000886917114},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4106000065803528},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.40369999408721924},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3919999897480011},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.3781000077724457},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36239999532699585},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.35760000348091125},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.3133000135421753},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.30660000443458557},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.26080000400543213}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246668","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2158782408","https://openalex.org/W2963411833","https://openalex.org/W2968268581","https://openalex.org/W3088310808","https://openalex.org/W3206200647","https://openalex.org/W4383108454","https://openalex.org/W4385430474","https://openalex.org/W4386072455","https://openalex.org/W4399213366","https://openalex.org/W4401415522","https://openalex.org/W4401416214","https://openalex.org/W4401417065","https://openalex.org/W4402354084","https://openalex.org/W4402354126","https://openalex.org/W4402354166","https://openalex.org/W4404446892","https://openalex.org/W4410706720","https://openalex.org/W4413362511","https://openalex.org/W4413917091","https://openalex.org/W4413917236","https://openalex.org/W4413925041","https://openalex.org/W4413925370","https://openalex.org/W4413925997","https://openalex.org/W4413947056","https://openalex.org/W4414079285"],"related_works":[],"abstract_inverted_index":{"Autonomous":[0],"learning":[1,20],"of":[2,12,98,193],"dexterous,":[3],"long-horizon":[4],"robotic":[5,18,207],"skills":[6],"has":[7],"been":[8],"a":[9,59,76,91,112,191],"longstanding":[10],"pursuit":[11],"embodied":[13],"AI.":[14],"Recent":[15],"advances":[16],"in":[17,28,36,115,126],"reinforcement":[19],"(RL)":[21],"have":[22],"demonstrated":[23],"remarkable":[24],"performance":[25],"and":[26,49,61,67,101,137,145,164,180,195],"robustness":[27],"real-world":[29,99,124,138,172,206],"visuomotor":[30,113],"control":[31],"tasks.":[32,168],"However,":[33],"applying":[34],"RL":[35,100,125,173],"the":[37,71,96,116,151],"real":[38],"world":[39],"faces":[40],"challenges":[41],"such":[42],"as":[43,190],"low":[44],"sample":[45,178],"efficiency,":[46],"slow":[47],"exploration,":[48],"significant":[50],"reliance":[51],"on":[52,86,199],"human":[53],"intervention.":[54],"In":[55],"contrast,":[56],"simulators":[57],"offer":[58],"safe":[60],"efficient":[62],"environment":[63],"for":[64,154],"extensive":[65,134],"exploration":[66],"data":[68],"collection,":[69],"while":[70],"visual":[72],"sim-to-real":[73],"gap,":[74],"often":[75],"limiting":[77],"factor,":[78],"can":[79],"be":[80],"mitigated":[81],"using":[82,133],"real-to-sim":[83],"techniques.":[84],"Building":[85],"these,":[87],"we":[88,109],"propose":[89],"SimLauncher,":[90],"novel":[92],"framework":[93],"that":[94],"combines":[95],"strengths":[97],"real-to-sim-to-real":[102],"approaches":[103],"to":[104,170,204],"overcome":[105],"these":[106],"challenges.":[107],"Specifically,":[108],"first":[110],"pre-train":[111],"policy":[114,143,153],"digital":[117],"twin":[118],"simulation":[119,202],"environment,":[120],"which":[121],"then":[122],"benefits":[123],"two":[127],"ways:":[128],"(1)":[129],"bootstrapping":[130],"target":[131],"values":[132],"simulated":[135],"demonstrations":[136,139],"derived":[140],"from":[141,150],"pre-trained":[142,152],"rollouts,":[144],"(2)":[146],"Incorporating":[147],"action":[148],"proposals":[149],"better":[155],"exploration.":[156],"We":[157,185],"conduct":[158],"comprehensive":[159],"experiments":[160],"across":[161],"multi-stage,":[162],"contact-rich,":[163],"dexterous":[165],"hand":[166],"manipulation":[167],"Compared":[169],"prior":[171],"approaches,":[174],"SimLauncher":[175],"significantly":[176],"improves":[177],"efficiency":[179],"achieves":[181],"near-perfect":[182],"success":[183],"rates.":[184],"hope":[186],"this":[187],"work":[188],"serves":[189],"proof":[192],"concept":[194],"inspires":[196],"further":[197],"research":[198],"leveraging":[200],"large-scale":[201],"pre-training":[203],"benefit":[205],"RL.":[208]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-11-28T00:00:00"}
