{"id":"https://openalex.org/W4393159809","doi":"https://doi.org/10.1609/aaai.v38i10.28970","title":"Contextual Pre-planning on Reward Machine Abstractions for Enhanced Transfer in Deep Reinforcement Learning","display_name":"Contextual Pre-planning on Reward Machine Abstractions for Enhanced Transfer in Deep Reinforcement Learning","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393159809","doi":"https://doi.org/10.1609/aaai.v38i10.28970"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v38i10.28970","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i10.28970","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/28970/29844","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/28970/29844","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020333684","display_name":"Guy Azran","orcid":null},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Guy Azran","raw_affiliation_strings":["Technion - Israel Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039506832","display_name":"Mohamad H. Danesh","orcid":null},"institutions":[{"id":"https://openalex.org/I5023651","display_name":"McGill University","ror":"https://ror.org/01pxwe438","country_code":"CA","type":"education","lineage":["https://openalex.org/I5023651"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mohamad H. Danesh","raw_affiliation_strings":["McGill University"],"affiliations":[{"raw_affiliation_string":"McGill University","institution_ids":["https://openalex.org/I5023651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073739308","display_name":"Stefano V. Albrecht","orcid":"https://orcid.org/0000-0002-8735-1465"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stefano V. Albrecht","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047553225","display_name":"Sarah Keren","orcid":"https://orcid.org/0000-0001-7211-753X"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Sarah Keren","raw_affiliation_strings":["Technion - Israel Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology","institution_ids":["https://openalex.org/I174306211"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5020333684"],"corresponding_institution_ids":["https://openalex.org/I174306211"],"apc_list":null,"apc_paid":null,"fwci":0.31,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.4637224,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"38","issue":"10","first_page":"10953","last_page":"10961"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9244999885559082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9244999885559082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8342439532279968},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.608640193939209},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5903971195220947},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48688235878944397},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4718461036682129},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.4090856909751892},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40200209617614746},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3256129026412964},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.14610221982002258}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8342439532279968},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.608640193939209},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5903971195220947},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48688235878944397},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4718461036682129},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4090856909751892},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40200209617614746},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3256129026412964},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.14610221982002258}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v38i10.28970","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i10.28970","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/28970/29844","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v38i10.28970","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i10.28970","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/28970/29844","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4393159809.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W195596278","https://openalex.org/W1522301498","https://openalex.org/W1777239053","https://openalex.org/W2076337359","https://openalex.org/W2097381042","https://openalex.org/W2109910161","https://openalex.org/W2145339207","https://openalex.org/W2578206533","https://openalex.org/W2604763608","https://openalex.org/W2766329790","https://openalex.org/W2783109451","https://openalex.org/W2797527950","https://openalex.org/W2804948070","https://openalex.org/W2970673985","https://openalex.org/W2980555752","https://openalex.org/W3021208093","https://openalex.org/W3037476194","https://openalex.org/W3090386093","https://openalex.org/W3092156990","https://openalex.org/W3093096978","https://openalex.org/W3118210634","https://openalex.org/W3119486431","https://openalex.org/W3129170303","https://openalex.org/W3131897549","https://openalex.org/W3171517119","https://openalex.org/W3172592753","https://openalex.org/W3178385351","https://openalex.org/W3197485521","https://openalex.org/W3206976301","https://openalex.org/W3213132814","https://openalex.org/W3213243879","https://openalex.org/W4226340130","https://openalex.org/W4226452279","https://openalex.org/W4285428938","https://openalex.org/W4287018575","https://openalex.org/W4287080810","https://openalex.org/W4287762569","https://openalex.org/W4300427736","https://openalex.org/W4300971732","https://openalex.org/W4306999448","https://openalex.org/W4378508712","https://openalex.org/W4394666657","https://openalex.org/W6631190155","https://openalex.org/W6669402789","https://openalex.org/W6692405165","https://openalex.org/W6746721349","https://openalex.org/W6747905248","https://openalex.org/W6750629867","https://openalex.org/W6780559895","https://openalex.org/W6803178138"],"related_works":["https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Recent":[0],"studies":[1],"show":[2,108],"that":[3,53,109],"deep":[4],"reinforcement":[5],"learning":[6,30],"(DRL)":[7],"agents":[8,67,93],"tend":[9],"to":[10,12,22,24,33,41,94],"overfit":[11],"the":[13,43,58],"task":[14,45],"on":[15,57],"which":[16],"they":[17],"were":[18],"trained":[19],"and":[20,62,79,101,115],"fail":[21],"adapt":[23],"minor":[25],"environment":[26],"changes.":[27],"To":[28],"expedite":[29],"when":[31],"transferring":[32],"unseen":[34],"tasks,":[35,91],"we":[36],"propose":[37],"a":[38,119],"novel":[39],"approach":[40],"representing":[42],"current":[44,59,76],"using":[46],"reward":[47],"machines":[48],"(RMs),":[49],"state":[50,78],"machine":[51],"abstractions":[52],"induce":[54],"subtasks":[55],"based":[56],"task\u2019s":[60],"rewards":[61,80],"dynamics.":[63],"Our":[64],"method":[65],"provides":[66],"with":[68],"symbolic":[69],"representations":[70,87,111],"of":[71,97,121],"optimal":[72],"transitions":[73],"from":[74],"their":[75],"abstract":[77],"them":[81],"for":[82],"achieving":[83],"these":[84],"transitions.":[85],"These":[86],"are":[88],"shared":[89],"across":[90],"allowing":[92],"exploit":[95],"knowledge":[96],"previously":[98],"encountered":[99],"symbols":[100],"transitions,":[102],"thus":[103],"enhancing":[104],"transfer.":[105],"Empirical":[106],"results":[107],"our":[110],"improve":[112],"sample":[113],"efficiency":[114],"few-shot":[116],"transfer":[117],"in":[118],"variety":[120],"domains.":[122]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-01-25T23:04:38.658462","created_date":"2025-10-10T00:00:00"}
