{"id":"https://openalex.org/W3008535267","doi":"https://doi.org/10.1109/lra.2020.3015448","title":"\u201cGood Robot!\u201d: Efficient Reinforcement Learning for Multi-Step Visual Tasks with Sim to Real Transfer","display_name":"\u201cGood Robot!\u201d: Efficient Reinforcement Learning for Multi-Step Visual Tasks with Sim to Real Transfer","publication_year":2020,"publication_date":"2020-08-11","ids":{"openalex":"https://openalex.org/W3008535267","doi":"https://doi.org/10.1109/lra.2020.3015448","mag":"3008535267"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2020.3015448","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2020.3015448","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.11730","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Andrew Hundt","orcid":"https://orcid.org/0000-0003-2023-1810"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Hundt","raw_affiliation_strings":["The Johns Hopkins University, Baltimore, USA"],"raw_orcid":"https://orcid.org/0000-0003-2023-1810","affiliations":[{"raw_affiliation_string":"The Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Benjamin Killeen","orcid":"https://orcid.org/0000-0003-2511-7929"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benjamin Killeen","raw_affiliation_strings":["The Johns Hopkins University, Baltimore, USA"],"raw_orcid":"https://orcid.org/0000-0003-2511-7929","affiliations":[{"raw_affiliation_string":"The Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nicholas Greene","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicholas Greene","raw_affiliation_strings":["The Johns Hopkins University, Baltimore, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hongtao Wu","orcid":"https://orcid.org/0000-0002-6442-8159"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hongtao Wu","raw_affiliation_strings":["The Johns Hopkins University, Baltimore, USA"],"raw_orcid":"https://orcid.org/0000-0002-6442-8159","affiliations":[{"raw_affiliation_string":"The Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Heeyeon Kwon","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heeyeon Kwon","raw_affiliation_strings":["The Johns Hopkins University, Baltimore, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chris Paxton","orcid":"https://orcid.org/0000-0003-1009-5982"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chris Paxton","raw_affiliation_strings":["NVIDIA, Seattle, USA"],"raw_orcid":"https://orcid.org/0000-0003-1009-5982","affiliations":[{"raw_affiliation_string":"NVIDIA, Seattle, USA","institution_ids":["https://openalex.org/I4210127875","https://openalex.org/I58610484"]}]},{"author_position":"last","author":{"id":null,"display_name":"Gregory D. Hager","orcid":"https://orcid.org/0000-0002-6662-9763"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory D. Hager","raw_affiliation_strings":["The Johns Hopkins University, Baltimore, USA"],"raw_orcid":"https://orcid.org/0000-0002-6662-9763","affiliations":[{"raw_affiliation_string":"The Johns Hopkins University, Baltimore, USA","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.4685,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.9540861,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"5","issue":"4","first_page":"6724","last_page":"6731"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.39399999380111694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.39399999380111694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.13940000534057617,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.10729999840259552,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7675999999046326},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6513000130653381},{"id":"https://openalex.org/keywords/row","display_name":"Row","score":0.6010000109672546},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5855000019073486},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5006999969482422},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.47929999232292175},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.477400004863739},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.43299999833106995},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.40139999985694885}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7675999999046326},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.72079998254776},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6513000130653381},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.6010000109672546},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5855000019073486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5382999777793884},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5006999969482422},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.47929999232292175},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.477400004863739},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.43299999833106995},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4047999978065491},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.40139999985694885},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C134697681","wikidata":"https://www.wikidata.org/wiki/Q1609677","display_name":"Clearing","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3140000104904175},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.298799991607666},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29589998722076416},{"id":"https://openalex.org/C104140500","wikidata":"https://www.wikidata.org/wiki/Q2088159","display_name":"Row and column spaces","level":3,"score":0.27309998869895935},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C10431821","wikidata":"https://www.wikidata.org/wiki/Q6510174","display_name":"Learning effect","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2020.3015448","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2020.3015448","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1909.11730","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.11730","pdf_url":"https://arxiv.org/pdf/1909.11730","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.11730","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.11730","pdf_url":"https://arxiv.org/pdf/1909.11730","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2712871118","display_name":null,"funder_award_id":"N00014-17-1-2124","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1892339738","https://openalex.org/W1999156278","https://openalex.org/W2161168419","https://openalex.org/W2600030077","https://openalex.org/W2605102758","https://openalex.org/W2799140957","https://openalex.org/W2910474428","https://openalex.org/W2925173345","https://openalex.org/W2962736495","https://openalex.org/W2962793652","https://openalex.org/W2962899390","https://openalex.org/W2963033241","https://openalex.org/W2963149945","https://openalex.org/W2963170432","https://openalex.org/W2963326767","https://openalex.org/W2963513937","https://openalex.org/W2963569233","https://openalex.org/W2963689319","https://openalex.org/W2963713397","https://openalex.org/W2964055695","https://openalex.org/W2964262254","https://openalex.org/W2986303149","https://openalex.org/W3003651690","https://openalex.org/W6638088447","https://openalex.org/W6682849425","https://openalex.org/W6687681856","https://openalex.org/W6697152211","https://openalex.org/W6712884540","https://openalex.org/W6718836005","https://openalex.org/W6730038592","https://openalex.org/W6740801417","https://openalex.org/W6740972095","https://openalex.org/W6741036684","https://openalex.org/W6744838376","https://openalex.org/W6746713385","https://openalex.org/W6746721349","https://openalex.org/W6751955673","https://openalex.org/W6752818329","https://openalex.org/W6753243525","https://openalex.org/W6764969207","https://openalex.org/W6771128445"],"related_works":[],"abstract_inverted_index":{"Current":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"algorithms":[4],"struggle":[5],"with":[6,51,103,145,155,168,184,201],"long-horizon":[7],"tasks":[8,195],"where":[9],"time":[10],"can":[11],"be":[12,21],"wasted":[13],"exploring":[14,40],"dead":[15],"ends":[16],"and":[17,42,90,148,199],"task":[18],"progress":[19,48,204],"may":[20],"easily":[22],"reversed.":[23],"We":[24,134],"develop":[25],"the":[26,124,161,165,178],"SPOT":[27,55],"framework,":[28],"which":[29],"explores":[30],"within":[31],"action":[32],"safety":[33],"zones,":[34],"learns":[35],"about":[36],"unsafe":[37],"regions":[38],"without":[39],"them,":[41],"prioritizes":[43],"experiences":[44],"that":[45],"reverse":[46],"earlier":[47],"to":[49,74,82,93,105,131,137,187,191],"learn":[50],"remarkable":[52],"efficiency.":[53],"The":[54],"framework":[56],"successfully":[57],"completes":[58],"simulated":[59],"trials":[60,144,154],"of":[61,64,87,143,153,181,203],"a":[62,67],"variety":[63],"tasks,":[65],"improving":[66],"baseline":[68],"trial":[69,108],"success":[70],"rate":[71],"from":[72,80,91],"13%":[73,81],"100%":[75,142,152],"when":[76,84,95],"stacking":[77],"4":[78,88],"cubes,":[79,89],"99%":[83],"creating":[85],"rows":[86,150],"84%":[92],"95%":[94],"clearing":[96],"toys":[97],"arranged":[98],"in":[99,141,151],"adversarial":[100],"patterns.":[101],"Efficiency":[102],"respect":[104],"actions":[106],"per":[107],"typically":[109],"improves":[110],"by":[111,158],"30%":[112],"or":[113],"more,":[114],"while":[115],"training":[116],"takes":[117],"just":[118],"1-20":[119],"k":[120],"actions,":[121],"depending":[122],"on":[123,164],"task.":[125],"Furthermore,":[126],"we":[127],"demonstrate":[128],"direct":[129],"sim":[130,186],"real":[132,139,149,166,188],"transfer.":[133],"are":[135],"able":[136],"create":[138],"stacks":[140],"61%":[146],"efficiency":[147,157],"59%":[156],"directly":[159],"loading":[160],"simulation-trained":[162],"model":[163],"robot":[167],"no":[169],"additional":[170],"real-world":[171],"fine-tuning.":[172],"To":[173],"our":[174],"knowledge,":[175],"this":[176],"is":[177,207],"first":[179],"instance":[180],"reinforcement":[182],"learning":[183],"successful":[185],"transfer":[189],"applied":[190],"long":[192],"term":[193],"multi-step":[194],"such":[196],"as":[197],"block-stacking":[198],"row-making":[200],"consideration":[202],"reversal.":[205],"Code":[206],"available":[208],"at":[209],"https://github.com/jhu-lcsr/good_robot.":[210]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2020-03-06T00:00:00"}
