{"id":"https://openalex.org/W4316021894","doi":"https://doi.org/10.1109/lra.2023.3236882","title":"Learning From Guided Play: Improving Exploration for Adversarial Imitation Learning With Simple Auxiliary Tasks","display_name":"Learning From Guided Play: Improving Exploration for Adversarial Imitation Learning With Simple Auxiliary Tasks","publication_year":2023,"publication_date":"2023-01-13","ids":{"openalex":"https://openalex.org/W4316021894","doi":"https://doi.org/10.1109/lra.2023.3236882"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2023.3236882","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2023.3236882","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036575954","display_name":"Trevor Ablett","orcid":"https://orcid.org/0000-0001-5904-8402"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Trevor Ablett","raw_affiliation_strings":["Space and Terrestrial Autonomous Robotic Systems (STARS) Laboratory, University of Toronto Institute for Aerospace Studies (UTIAS), Toronto, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0001-5904-8402","affiliations":[{"raw_affiliation_string":"Space and Terrestrial Autonomous Robotic Systems (STARS) Laboratory, University of Toronto Institute for Aerospace Studies (UTIAS), Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053699492","display_name":"Bryan Chan","orcid":"https://orcid.org/0000-0002-6278-9575"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Bryan Chan","raw_affiliation_strings":["Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":"https://orcid.org/0000-0002-6278-9575","affiliations":[{"raw_affiliation_string":"Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001210516","display_name":"Jonathan Kelly","orcid":"https://orcid.org/0000-0002-5528-6136"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jonathan Kelly","raw_affiliation_strings":["Space and Terrestrial Autonomous Robotic Systems (STARS) Laboratory, University of Toronto Institute for Aerospace Studies (UTIAS), Toronto, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0002-5528-6136","affiliations":[{"raw_affiliation_string":"Space and Terrestrial Autonomous Robotic Systems (STARS) Laboratory, University of Toronto Institute for Aerospace Studies (UTIAS), Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1421,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.81647744,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"8","issue":"3","first_page":"1263","last_page":"1270"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.8027842044830322},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7278534173965454},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6745935082435608},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6444478034973145},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6119005084037781},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5463590621948242},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5323346853256226},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.44048643112182617},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10362514853477478}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.8027842044830322},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7278534173965454},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6745935082435608},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6444478034973145},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6119005084037781},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5463590621948242},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5323346853256226},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.44048643112182617},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10362514853477478},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2023.3236882","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2023.3236882","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.75}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320994","display_name":"Canada Research Chairs","ror":"https://ror.org/0517h6h17"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W142858861","https://openalex.org/W1999874108","https://openalex.org/W2109910161","https://openalex.org/W2741122588","https://openalex.org/W2810785043","https://openalex.org/W2962845991","https://openalex.org/W2962894046","https://openalex.org/W2963099939","https://openalex.org/W3018036994","https://openalex.org/W3039179219","https://openalex.org/W3129322645","https://openalex.org/W3157951743","https://openalex.org/W3168892396","https://openalex.org/W3206200647","https://openalex.org/W4214717370","https://openalex.org/W6605846256","https://openalex.org/W6640174482","https://openalex.org/W6717230150","https://openalex.org/W6718092244","https://openalex.org/W6738261575","https://openalex.org/W6740888197","https://openalex.org/W6742461812","https://openalex.org/W6747473740","https://openalex.org/W6748523217","https://openalex.org/W6752338937","https://openalex.org/W6753243525","https://openalex.org/W6754484421","https://openalex.org/W6754848371","https://openalex.org/W6760439459","https://openalex.org/W6767765287","https://openalex.org/W6768078163","https://openalex.org/W6769035977","https://openalex.org/W6780440201","https://openalex.org/W6795344713","https://openalex.org/W6796441166","https://openalex.org/W6797377937","https://openalex.org/W6799150178","https://openalex.org/W6810683139"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W2768698792"],"abstract_inverted_index":{"Adversarial":[0],"imitation":[1,11],"learning":[2,12,31,65],"(AIL)":[3],"has":[4],"become":[5],"a":[6,48,53,84,98,114,156,192,200],"popular":[7],"alternative":[8],"to":[9,43,113,126,136],"supervised":[10],"that":[13,38,131,163,195],"reduces":[14],"the":[15,20,39,60,66,78,124,144,197,209,212],"distribution":[16,62],"shift":[17],"suffered":[18],"by":[19],"latter.":[21],"However,":[22],"AIL":[23,57,133,168,216],"requires":[24],"effective":[25],"exploration":[26,44],"during":[27],"an":[28,81],"online":[29],"reinforcement":[30],"phase.":[32],"In":[33],"this":[34,139,184],"work,":[35],"we":[36,102,187],"show":[37],"standard,":[40],"na\u00efve":[41],"approach":[42],"can":[45,70],"manifest":[46],"as":[47],"suboptimal":[49],"local":[50,201],"maximum":[51,202],"if":[52],"policy":[54],"learned":[55,213],"with":[56],"sufficiently":[58],"matches":[59],"expert":[61,82,104,147,176],"without":[63],"fully":[64],"desired":[67],"task.":[68,116],"This":[69],"be":[71],"particularly":[72],"catastrophic":[73],"for":[74,143],"manipulation":[75,160],"tasks,":[76],"where":[77],"difference":[79],"between":[80,149,199,211],"and":[83,129,169,203,206,217],"non-expert":[85],"state-action":[86],"pair":[87],"is":[88],"often":[89],"subtle.":[90],"We":[91],"present":[92],"Learning":[93],"from":[94,215],"Guided":[95],"Play":[96],"(LfGP),":[97],"framework":[99],"in":[100,111,155],"which":[101],"leverage":[103],"demonstrations":[105],"of":[106,119,146,191],"multiple":[107],"exploratory,":[108],"auxiliary":[109,121],"tasks":[110,122],"addition":[112,118],"main":[115,150],"The":[117],"these":[120,180],"forces":[123],"agent":[125],"explore":[127],"states":[128],"actions":[130],"standard":[132],"may":[134],"learn":[135],"ignore.":[137],"Additionally,":[138],"particular":[140],"formulation":[141],"allows":[142],"reusability":[145],"data":[148],"tasks.":[151],"Our":[152],"experimental":[153],"results":[154],"challenging":[157],"multitask":[158],"robotic":[159],"domain":[161],"indicate":[162],"LfGP":[164],"significantly":[165],"outperforms":[166],"both":[167],"behaviour":[170],"cloning,":[171],"while":[172],"also":[173,207],"being":[174],"more":[175],"sample":[177],"efficient":[178],"than":[179],"baselines.":[181],"To":[182],"explain":[183],"performance":[185],"gap,":[186],"provide":[188],"further":[189],"analysis":[190],"toy":[193],"problem":[194],"highlights":[196],"coupling":[198],"poor":[204],"exploration,":[205],"visualize":[208],"differences":[210],"models":[214],"LfGP.":[218]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
