{"id":"https://openalex.org/W3206827162","doi":"https://doi.org/10.1109/icra46639.2022.9812312","title":"Offline Meta-Reinforcement Learning for Industrial Insertion","display_name":"Offline Meta-Reinforcement Learning for Industrial Insertion","publication_year":2022,"publication_date":"2022-05-23","ids":{"openalex":"https://openalex.org/W3206827162","doi":"https://doi.org/10.1109/icra46639.2022.9812312","mag":"3206827162"},"language":"en","primary_location":{"id":"doi:10.1109/icra46639.2022.9812312","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9812312","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041306134","display_name":"Tony Z. Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tony Z. Zhao","raw_affiliation_strings":["Work done as an intern at X, The Moonshot Factory,Mountain View,CA,USA","Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Work done as an intern at X, The Moonshot Factory,Mountain View,CA,USA","institution_ids":[]},{"raw_affiliation_string":"Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037464692","display_name":"Jianlan Luo","orcid":"https://orcid.org/0009-0008-8029-7794"},"institutions":[{"id":"https://openalex.org/I4210105824","display_name":"Intrinsic LifeSciences (United States)","ror":"https://ror.org/01maah330","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianlan Luo","raw_affiliation_strings":["Intrinsic Innovation LLC,Mountain View,CA,USA","Intrinsic Innovation LLC, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intrinsic Innovation LLC,Mountain View,CA,USA","institution_ids":["https://openalex.org/I4210105824"]},{"raw_affiliation_string":"Intrinsic Innovation LLC, Mountain View, CA, USA","institution_ids":["https://openalex.org/I4210105824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061512729","display_name":"Oleg Sushkov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Oleg Sushkov","raw_affiliation_strings":["Deepmind,London,UK","Deepmind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Deepmind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063215180","display_name":"Rugile Pevceviciute","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rugile Pevceviciute","raw_affiliation_strings":["Deepmind,London,UK","Deepmind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Deepmind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062951341","display_name":"Nicolas Heess","orcid":"https://orcid.org/0000-0001-7876-9256"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nicolas Heess","raw_affiliation_strings":["Deepmind,London,UK","Deepmind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Deepmind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014848474","display_name":"Jon Scholz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jon Scholz","raw_affiliation_strings":["Deepmind,London,UK","Deepmind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Deepmind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"Deepmind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029642293","display_name":"Stefan Schaal","orcid":"https://orcid.org/0000-0001-5660-1874"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stefan Schaal","raw_affiliation_strings":["Work done as an intern at X, The Moonshot Factory,Mountain View,CA,USA","Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Work done as an intern at X, The Moonshot Factory,Mountain View,CA,USA","institution_ids":[]},{"raw_affiliation_string":"Work done as an intern at X, The Moonshot Factory, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Google Brain,Mountain View,CA,USA","Department of Electric Engineering and Computer Science, University of California, Berkeley, Berkeley, CA, USA","Google Brain, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain,Mountain View,CA,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Department of Electric Engineering and Computer Science, University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Google Brain, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":22.0976,"has_fulltext":false,"cited_by_count":66,"citation_normalized_percentile":{"value":0.99808815,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"6386","last_page":"6393"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9779999852180481,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8664258718490601},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8518601655960083},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta learning (computer science)","score":0.7905271053314209},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6697176098823547},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6343201398849487},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5760455131530762},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5546056628227234},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5447701811790466},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.49993395805358887},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3377506732940674}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8664258718490601},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8518601655960083},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.7905271053314209},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6697176098823547},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6343201398849487},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5760455131530762},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5546056628227234},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5447701811790466},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.49993395805358887},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3377506732940674},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra46639.2022.9812312","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9812312","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5299999713897705}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1938885652","https://openalex.org/W2008731016","https://openalex.org/W2034052310","https://openalex.org/W2064557786","https://openalex.org/W2110697446","https://openalex.org/W2111067802","https://openalex.org/W2115617434","https://openalex.org/W2126909264","https://openalex.org/W2155007355","https://openalex.org/W2539574638","https://openalex.org/W2553722312","https://openalex.org/W2578206533","https://openalex.org/W2604763608","https://openalex.org/W2741122588","https://openalex.org/W2755546070","https://openalex.org/W2781726626","https://openalex.org/W2788904251","https://openalex.org/W2794757725","https://openalex.org/W2883403478","https://openalex.org/W2923504512","https://openalex.org/W2952526277","https://openalex.org/W2962732055","https://openalex.org/W2962732398","https://openalex.org/W2963165111","https://openalex.org/W2963176272","https://openalex.org/W2963403593","https://openalex.org/W2963411833","https://openalex.org/W2963864421","https://openalex.org/W2963940579","https://openalex.org/W2964093801","https://openalex.org/W2964161785","https://openalex.org/W2964333597","https://openalex.org/W2967355195","https://openalex.org/W2967727187","https://openalex.org/W2968268581","https://openalex.org/W3012148463","https://openalex.org/W3022566517","https://openalex.org/W3033324992","https://openalex.org/W3034786558","https://openalex.org/W3047193571","https://openalex.org/W3090369311","https://openalex.org/W3097907450","https://openalex.org/W3125760305","https://openalex.org/W3130717831","https://openalex.org/W3130984490","https://openalex.org/W3159735414","https://openalex.org/W3169929896","https://openalex.org/W3172360140","https://openalex.org/W3174364619","https://openalex.org/W3178748050","https://openalex.org/W3210319071","https://openalex.org/W4287082344","https://openalex.org/W4287689437","https://openalex.org/W4287692003","https://openalex.org/W4287756699","https://openalex.org/W6666610528","https://openalex.org/W6682849425","https://openalex.org/W6684921986","https://openalex.org/W6727807385","https://openalex.org/W6736057607","https://openalex.org/W6742461812","https://openalex.org/W6744181227","https://openalex.org/W6748600884","https://openalex.org/W6760698134","https://openalex.org/W6781236509","https://openalex.org/W6782022165","https://openalex.org/W6783339454","https://openalex.org/W6783962090","https://openalex.org/W6796589144"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W3130669838","https://openalex.org/W2785397462","https://openalex.org/W4294873804","https://openalex.org/W4383109125","https://openalex.org/W2891227010"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,259],"(RL)":[2],"can":[3,86,116,155],"in":[4,79,125,177,206],"principle":[5],"let":[6],"robots":[7],"automatically":[8],"adapt":[9,48,236],"to":[10,23,33,45,47,146,157,159,203,234,237],"new":[11,34,160,199],"tasks,":[12,142,243],"but":[13],"current":[14],"RL":[15,129],"methods":[16,154],"require":[17,108],"a":[18,50,174,238,245,252],"large":[19,63],"number":[20,64],"of":[21,39,65,139,240,248,254],"trials":[22,67,136],"accomplish":[24],"this.":[25],"In":[26,93],"this":[27,94,115,188],"paper,":[28],"we":[29,96],"tackle":[30],"rapid":[31],"adaptation":[32,58,75],"tasks":[35,44,161,261],"through":[36,225],"the":[37,140,144,198,207,211,255,260],"framework":[38],"meta-learning,":[40],"which":[41,172],"utilizes":[42],"past":[43],"learn":[46],"with":[49,119,193,244],"specific":[51,99],"focus":[52],"on":[53],"industrial":[54,178],"insertion":[55,84,242],"tasks.":[56],"Fast":[57],"is":[59,76,201,219,232],"crucial":[60],"because":[61],"prohibitively":[62],"on-robot":[66],"will":[68],"potentially":[69],"damage":[70],"hardware":[71],"pieces.":[72],"Additionally,":[73],"effective":[74],"also":[77],"feasible":[78],"that":[80,114,131,162,229],"experience":[81],"among":[82],"different":[83,165,241],"applications":[85],"be":[87,117],"largely":[88],"leveraged":[89],"by":[90,189],"each":[91,138],"other.":[92],"setting,":[95],"address":[97,187],"two":[98],"challenges":[100],"when":[101],"applying":[102],"meta-learning.":[103],"First,":[104],"conventional":[105],"meta-RL":[106,149,153],"algorithms":[107],"lengthy":[109],"online":[110,195],"meta-training.":[111],"We":[112,186,227],"show":[113,228],"replaced":[118],"appropriately":[120],"chosen":[121],"offline":[122,127],"data,":[123,209],"resulting":[124],"an":[126],"meta-":[128],"method":[130],"only":[132,251],"requires":[133],"demonstrations":[134],"and":[135,216,266],"from":[137,166,262],"prior":[141,208],"without":[143],"need":[145],"run":[147],"costly":[148],"procedures":[150],"online.":[151],"Second,":[152],"fail":[156],"generalize":[158],"are":[163,184,268],"too":[164,220],"those":[167,204],"seen":[168,205],"at":[169,270],"meta-training":[170],"time,":[171],"poses":[173],"particular":[175],"challenge":[176],"applications,":[179],"where":[180],"high":[181],"success":[182,246],"rates":[183],"critical.":[185],"combining":[190],"contextual":[191,212],"meta-learning":[192],"direct":[194],"finetuning:":[196],"if":[197,217],"task":[200],"similar":[202],"then":[210],"meta-learner":[213],"adapts":[214,224],"immediately,":[215],"it":[218,222],"different,":[221],"gradually":[223],"finetuning.":[226],"our":[230],"approach":[231],"able":[233],"quickly":[235],"variety":[239],"rate":[247],"100%":[249],"using":[250],"fraction":[253],"samples":[256],"needed":[257],"for":[258],"scratch.":[263],"Experiment":[264],"videos":[265],"details":[267],"available":[269],"//sites.google.com/view/offline-metarl-insertion.https:":[271]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":27},{"year":2023,"cited_by_count":19},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
