{"id":"https://openalex.org/W3083905601","doi":"https://doi.org/10.1613/jair.1.12372","title":"Induction and Exploitation of Subgoal Automata for Reinforcement Learning","display_name":"Induction and Exploitation of Subgoal Automata for Reinforcement Learning","publication_year":2021,"publication_date":"2021-03-10","ids":{"openalex":"https://openalex.org/W3083905601","doi":"https://doi.org/10.1613/jair.1.12372","mag":"3083905601"},"language":"en","primary_location":{"id":"doi:10.1613/jair.1.12372","is_oa":true,"landing_page_url":"https://doi.org/10.1613/jair.1.12372","pdf_url":"https://jair.org/index.php/jair/article/download/12372/26668","source":{"id":"https://openalex.org/S139930977","display_name":"Journal of Artificial Intelligence Research","issn_l":"1076-9757","issn":["1076-9757","1943-5037"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315760","host_organization_name":"AI Access Foundation","host_organization_lineage":["https://openalex.org/P4310315760"],"host_organization_lineage_names":["AI Access Foundation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Artificial Intelligence Research","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://jair.org/index.php/jair/article/download/12372/26668","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Daniel Furelos-Blanco","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Daniel Furelos-Blanco","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mark Law","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark Law","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Anders Jonsson","orcid":null},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Anders Jonsson","raw_affiliation_strings":["Universitat Pompeu Fabra"],"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Krysia Broda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krysia Broda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Alessandra Russo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alessandra Russo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":2.0959,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.88922597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"70","issue":null,"first_page":"1031","last_page":"1116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.620199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.620199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.13510000705718994,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.1103999987244606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1613/jair.1.12372","is_oa":true,"landing_page_url":"https://doi.org/10.1613/jair.1.12372","pdf_url":"https://jair.org/index.php/jair/article/download/12372/26668","source":{"id":"https://openalex.org/S139930977","display_name":"Journal of Artificial Intelligence Research","issn_l":"1076-9757","issn":["1076-9757","1943-5037"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315760","host_organization_name":"AI Access Foundation","host_organization_lineage":["https://openalex.org/P4310315760"],"host_organization_lineage_names":["AI Access Foundation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Artificial Intelligence Research","raw_type":"journal-article"},{"id":"pmh:oai:repositori-api.upf.edu:10230/47236","is_oa":false,"landing_page_url":"http://hdl.handle.net/10230/47236","pdf_url":null,"source":{"id":"https://openalex.org/S4306402615","display_name":"Repositori digital de la UPF (Universitat Pompeu Fabra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I170486558","host_organization_name":"Universitat Pompeu Fabra","host_organization_lineage":["https://openalex.org/I170486558"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2009.03855","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.03855","pdf_url":"https://arxiv.org/pdf/2009.03855","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:spiral.imperial.ac.uk:10044/1/88084","is_oa":false,"landing_page_url":"http://hdl.handle.net/10044/1/88084","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"1116","raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1613/jair.1.12372","is_oa":true,"landing_page_url":"https://doi.org/10.1613/jair.1.12372","pdf_url":"https://jair.org/index.php/jair/article/download/12372/26668","source":{"id":"https://openalex.org/S139930977","display_name":"Journal of Artificial Intelligence Research","issn_l":"1076-9757","issn":["1076-9757","1943-5037"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315760","host_organization_name":"AI Access Foundation","host_organization_lineage":["https://openalex.org/P4310315760"],"host_organization_lineage_names":["AI Access Foundation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Artificial Intelligence Research","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3083905601.pdf","grobid_xml":"https://content.openalex.org/works/W3083905601.grobid-xml"},"referenced_works_count":62,"referenced_works":["https://openalex.org/W92128319","https://openalex.org/W138607541","https://openalex.org/W277443901","https://openalex.org/W561688239","https://openalex.org/W1494114146","https://openalex.org/W1507087299","https://openalex.org/W1528049422","https://openalex.org/W1536990779","https://openalex.org/W1559171143","https://openalex.org/W1640247718","https://openalex.org/W1640774615","https://openalex.org/W1725778618","https://openalex.org/W1777239053","https://openalex.org/W1814441753","https://openalex.org/W1989445634","https://openalex.org/W2019001600","https://openalex.org/W2109910161","https://openalex.org/W2121517924","https://openalex.org/W2143435603","https://openalex.org/W2145339207","https://openalex.org/W2153947321","https://openalex.org/W2155968351","https://openalex.org/W2156493855","https://openalex.org/W2161252410","https://openalex.org/W2161966552","https://openalex.org/W2168359464","https://openalex.org/W2283424513","https://openalex.org/W2498573650","https://openalex.org/W2553882142","https://openalex.org/W2592215206","https://openalex.org/W2734058336","https://openalex.org/W2770406504","https://openalex.org/W2804948070","https://openalex.org/W2888038357","https://openalex.org/W2888241791","https://openalex.org/W2902538158","https://openalex.org/W2902907165","https://openalex.org/W2912788251","https://openalex.org/W2923538364","https://openalex.org/W2955368974","https://openalex.org/W2966537673","https://openalex.org/W2970673985","https://openalex.org/W2972500268","https://openalex.org/W2973687367","https://openalex.org/W2990313855","https://openalex.org/W6631190155","https://openalex.org/W6633381923","https://openalex.org/W6636906961","https://openalex.org/W6637298476","https://openalex.org/W6642630624","https://openalex.org/W6654623329","https://openalex.org/W6672933780","https://openalex.org/W6677916085","https://openalex.org/W6680904998","https://openalex.org/W6683443546","https://openalex.org/W6692595500","https://openalex.org/W6760676710","https://openalex.org/W6765753263","https://openalex.org/W6771603063","https://openalex.org/W6771676130","https://openalex.org/W6775686901","https://openalex.org/W7075606714"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3,209],"present":[4],"ISA,":[5],"an":[6,30,177,227],"approach":[7],"for":[8],"learning":[9,16,22,184],"and":[10,69,141,161,194,241],"exploiting":[11],"subgoals":[12,39],"in":[13,158,186],"episodic":[14],"reinforcement":[15,21],"(RL)":[17],"tasks.":[18],"ISA":[19,157],"interleaves":[20],"with":[23,135],"the":[24,37,63,67,74,95,102,106,116,131,136,149,172,182,189,191,199,212,224,232],"induction":[25,132],"of":[26,48,56,66,97,133,139,181,188,206],"a":[27,46,60,70,90,114,120,143],"subgoal":[28,52,91],"automaton,":[29],"automaton":[31,53,92,109,117,122,173,183],"whose":[32],"edges":[33],"are":[34,236],"labeled":[35],"by":[36,101],"task\u2019s":[38],"expressed":[40],"as":[41],"propositional":[42],"logic":[43,83],"formulas":[44],"over":[45],"set":[47],"high-level":[49,98],"events.":[50],"A":[51,80],"also":[54],"consists":[55],"two":[57],"special":[58],"states:":[59],"state":[61,71,163],"indicating":[62,72],"successful":[64],"completion":[65],"task,":[68],"that":[73,93,123,125,170,211,222],"task":[75],"has":[76],"finished":[77],"without":[78],"succeeding.":[79],"state-of-the-art":[81],"inductive":[82],"programming":[84],"system":[85],"is":[86],"used":[87],"to":[88,147,219,231],"learn":[89,220],"covers":[94,124],"traces":[96],"events":[99],"observed":[100],"RL":[103,168,207],"agent.":[104],"When":[105],"currently":[107],"exploited":[108,218],"does":[110],"not":[111,237],"correctly":[112],"recognize":[113],"trace,":[115],"learner":[118],"induces":[119],"new":[121],"trace.":[126],"The":[127],"interleaving":[128],"process":[129],"guarantees":[130],"automata":[134,214,235],"minimum":[137],"number":[138],"states,":[140],"applies":[142],"symmetry":[144,192],"breaking":[145,193],"mechanism":[146],"shrink":[148],"search":[150],"space":[151,164],"whilst":[152],"remaining":[153],"complete.":[154],"We":[155,175],"evaluate":[156],"several":[159],"gridworld":[160],"continuous":[162],"problems":[165],"using":[166],"different":[167],"algorithms":[169],"leverage":[171],"structures.":[174],"provide":[176],"in-depth":[178],"empirical":[179],"analysis":[180],"performance":[185],"terms":[187],"traces,":[190],"specific":[195],"restrictions":[196],"imposed":[197],"on":[198],"final":[200],"learnable":[201],"automaton.":[202],"For":[203],"each":[204],"class":[205],"problem,":[208],"show":[210],"learned":[213,238],"can":[215],"be":[216],"successfully":[217],"policies":[221],"reach":[223],"goal,":[225],"achieving":[226],"average":[228],"reward":[229],"comparable":[230],"case":[233],"where":[234],"but":[239],"handcrafted":[240],"given":[242],"beforehand.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-03T08:47:05.690250","created_date":"2020-09-14T00:00:00"}
