{"id":"https://openalex.org/W2976047560","doi":"https://doi.org/10.1109/icra40945.2020.9196540","title":"Meta Reinforcement Learning for Sim-to-real Domain Adaptation","display_name":"Meta Reinforcement Learning for Sim-to-real Domain Adaptation","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W2976047560","doi":"https://doi.org/10.1109/icra40945.2020.9196540","mag":"2976047560"},"language":"en","primary_location":{"id":"doi:10.1109/icra40945.2020.9196540","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9196540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.12906","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003059336","display_name":"Karol Arndt","orcid":"https://orcid.org/0000-0001-9596-6585"},"institutions":[{"id":"https://openalex.org/I4210134318","display_name":"University of Technology","ror":"https://ror.org/03gbw6p94","country_code":"RU","type":"education","lineage":["https://openalex.org/I4210134318"]},{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI","RU"],"is_corresponding":true,"raw_author_name":"Karol Arndt","raw_affiliation_strings":["Aalto University, Espoo, Finland","(Helsinki University of Technology)"],"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"(Helsinki University of Technology)","institution_ids":["https://openalex.org/I4210134318"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047209845","display_name":"Murtaza Hazara","orcid":"https://orcid.org/0000-0003-0477-3767"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Murtaza Hazara","raw_affiliation_strings":["Aalto University, Espoo, Finland","Aalto University; Espoo Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Aalto University; Espoo Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038342432","display_name":"Ali Ghadirzadeh","orcid":null},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Ali Ghadirzadeh","raw_affiliation_strings":["Aalto University, Espoo, Finland","Aalto University; Espoo Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Aalto University; Espoo Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080940147","display_name":"Ville Kyrki","orcid":"https://orcid.org/0000-0002-5230-5549"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Ville Kyrki","raw_affiliation_strings":["Aalto University, Espoo, Finland","Aalto University; Espoo Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Aalto University; Espoo Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5003059336"],"corresponding_institution_ids":["https://openalex.org/I4210134318","https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":2.49661232,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.90110114,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2725","last_page":"2731"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9120560884475708},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7442455291748047},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7121926546096802},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.7107279300689697},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6554165482521057},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6209825873374939},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta learning (computer science)","score":0.5177437663078308},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5028064250946045},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49659568071365356},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4951290190219879},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.47660893201828003},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.47465780377388},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.4560111165046692},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4280101954936981},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4116189181804657},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3637532591819763},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.342800110578537},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1411186158657074}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9120560884475708},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7442455291748047},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7121926546096802},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.7107279300689697},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6554165482521057},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6209825873374939},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.5177437663078308},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5028064250946045},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49659568071365356},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4951290190219879},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.47660893201828003},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.47465780377388},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.4560111165046692},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4280101954936981},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4116189181804657},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3637532591819763},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.342800110578537},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1411186158657074},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra40945.2020.9196540","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9196540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.12906","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.12906","pdf_url":"https://arxiv.org/pdf/1909.12906","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2976047560","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1909.12906","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1909.12906","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1909.12906","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.12906","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.12906","pdf_url":"https://arxiv.org/pdf/1909.12906","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2976047560.pdf","grobid_xml":"https://content.openalex.org/works/W2976047560.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W99485931","https://openalex.org/W1757796397","https://openalex.org/W2122053572","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2592538810","https://openalex.org/W2604763608","https://openalex.org/W2605102758","https://openalex.org/W2736601468","https://openalex.org/W2753160622","https://openalex.org/W2775954438","https://openalex.org/W2785397462","https://openalex.org/W2798273187","https://openalex.org/W2892230114","https://openalex.org/W2895899670","https://openalex.org/W2918049070","https://openalex.org/W2928127548","https://openalex.org/W2962736495","https://openalex.org/W2963168530","https://openalex.org/W2963184939","https://openalex.org/W2963629498","https://openalex.org/W2964161785","https://openalex.org/W2964310273","https://openalex.org/W2968116426","https://openalex.org/W2990747716","https://openalex.org/W3003785634","https://openalex.org/W3004116079","https://openalex.org/W3101442004","https://openalex.org/W6637967152","https://openalex.org/W6682849425","https://openalex.org/W6697071109","https://openalex.org/W6731982132","https://openalex.org/W6736057607","https://openalex.org/W6741002519","https://openalex.org/W6742103744","https://openalex.org/W6743661861","https://openalex.org/W6746914971","https://openalex.org/W6747625265","https://openalex.org/W6749733343","https://openalex.org/W6754471908","https://openalex.org/W6755365793","https://openalex.org/W6755437240","https://openalex.org/W6755561334","https://openalex.org/W6756463683","https://openalex.org/W6760698134","https://openalex.org/W6770858630","https://openalex.org/W6780559895","https://openalex.org/W6966558720"],"related_works":["https://openalex.org/W2736601468","https://openalex.org/W2158782408","https://openalex.org/W2976228896","https://openalex.org/W3206393052","https://openalex.org/W3008492644","https://openalex.org/W3109396871","https://openalex.org/W3101442004","https://openalex.org/W2996037775","https://openalex.org/W2990747716","https://openalex.org/W2981030070","https://openalex.org/W2964043796","https://openalex.org/W2963614114","https://openalex.org/W2951775809","https://openalex.org/W2808844346","https://openalex.org/W1771410628","https://openalex.org/W3205260198","https://openalex.org/W3152039673","https://openalex.org/W3120102699","https://openalex.org/W3131309910","https://openalex.org/W2806332488"],"abstract_inverted_index":{"Modern":[0],"reinforcement":[1],"learning":[2,39],"methods":[3],"suffer":[4],"from":[5],"low":[6],"sample":[7],"efficiency":[8],"and":[9,53,79,100,120],"unsafe":[10],"exploration,":[11],"making":[12],"it":[13],"infeasible":[14],"to":[15,28,40,47,60,112],"train":[16,41],"robotic":[17],"policies":[18],"entirely":[19],"on":[20,94,104],"real":[21],"hardware.":[22],"In":[23],"this":[24,92],"work,":[25],"we":[26],"propose":[27],"address":[29],"the":[30,71,81,84,125],"problem":[31],"of":[32,50,83,107],"sim-to-real":[33],"domain":[34,75,122],"transfer":[35],"by":[36,73],"using":[37,54],"meta":[38],"a":[42,48,55,95,105,109,113],"policy":[43,93],"that":[44,65],"can":[45],"adapt":[46],"variety":[49],"dynamic":[51],"conditions,":[52],"task-specific":[56],"trajectory":[57],"generation":[58],"model":[59],"provide":[61],"an":[62],"action":[63],"space":[64,86],"facilitates":[66],"quick":[67],"exploration.":[68],"We":[69,89],"evaluate":[70,101],"method":[72,116],"performing":[74],"adaptation":[76,123],"in":[77,128],"simulation":[78],"analyzing":[80],"structure":[82],"latent":[85],"during":[87],"adaptation.":[88],"then":[90],"deploy":[91],"KUKA":[96],"LBR":[97],"4+":[98],"robot":[99],"its":[102],"performance":[103],"task":[106],"hitting":[108],"hockey":[110],"puck":[111],"target.":[114],"Our":[115],"shows":[117],"more":[118],"consistent":[119],"stable":[121],"than":[124],"baseline,":[126],"resulting":[127],"better":[129],"overall":[130],"performance.":[131]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":11}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
