{"id":"https://openalex.org/W4386484398","doi":"https://doi.org/10.1016/j.artint.2023.104001","title":"Reward-respecting subtasks for model-based reinforcement learning","display_name":"Reward-respecting subtasks for model-based reinforcement learning","publication_year":2023,"publication_date":"2023-09-06","ids":{"openalex":"https://openalex.org/W4386484398","doi":"https://doi.org/10.1016/j.artint.2023.104001"},"language":"en","primary_location":{"id":"doi:10.1016/j.artint.2023.104001","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.artint.2023.104001","pdf_url":null,"source":{"id":"https://openalex.org/S196139623","display_name":"Artificial Intelligence","issn_l":"0004-3702","issn":["0004-3702","1872-7921"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.artint.2023.104001","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004923102","display_name":"Richard S. Sutton","orcid":"https://orcid.org/0000-0002-3679-3415"},"institutions":[{"id":"https://openalex.org/I109736498","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95","country_code":"CA","type":"facility","lineage":["https://openalex.org/I109736498"]},{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Richard S. Sutton","raw_affiliation_strings":["Alberta Machine Intelligence Institute (Amii), Edmonton, Alberta, Canada","Canada CIFAR AI Chair, Canada","DeepMind, Edmonton, Alberta, Canada","University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute (Amii), Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"Canada CIFAR AI Chair, Canada","institution_ids":["https://openalex.org/I109736498"]},{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085413987","display_name":"Marlos C. Machado","orcid":"https://orcid.org/0000-0002-0117-6134"},"institutions":[{"id":"https://openalex.org/I109736498","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95","country_code":"CA","type":"facility","lineage":["https://openalex.org/I109736498"]},{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Marlos C. Machado","raw_affiliation_strings":["Alberta Machine Intelligence Institute (Amii), Edmonton, Alberta, Canada","Canada CIFAR AI Chair, Canada","DeepMind, Edmonton, Alberta, Canada","University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":"https://orcid.org/0000-0002-0117-6134","affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute (Amii), Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"Canada CIFAR AI Chair, Canada","institution_ids":["https://openalex.org/I109736498"]},{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014027207","display_name":"Gerhard Holland","orcid":"https://orcid.org/0000-0002-2317-3460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"G. Zacharias Holland","raw_affiliation_strings":["DeepMind, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040427757","display_name":"David Szepesv\u00e1ri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Szepesvari","raw_affiliation_strings":["DeepMind, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090501025","display_name":"Finbarr Timbers","orcid":"https://orcid.org/0000-0001-9047-9542"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Finbarr Timbers","raw_affiliation_strings":["DeepMind, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075806019","display_name":"B. K. Tanner","orcid":"https://orcid.org/0000-0002-1474-177X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brian Tanner","raw_affiliation_strings":["DeepMind, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108583544","display_name":"Adam White","orcid":null},"institutions":[{"id":"https://openalex.org/I109736498","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95","country_code":"CA","type":"facility","lineage":["https://openalex.org/I109736498"]},{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Adam White","raw_affiliation_strings":["Alberta Machine Intelligence Institute (Amii), Edmonton, Alberta, Canada","Canada CIFAR AI Chair, Canada","DeepMind, Edmonton, Alberta, Canada","University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alberta Machine Intelligence Institute (Amii), Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"Canada CIFAR AI Chair, Canada","institution_ids":["https://openalex.org/I109736498"]},{"raw_affiliation_string":"DeepMind, Edmonton, Alberta, Canada","institution_ids":[]},{"raw_affiliation_string":"University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5085413987"],"corresponding_institution_ids":["https://openalex.org/I109736498","https://openalex.org/I154425047"],"apc_list":{"value":3670,"currency":"USD","value_usd":3670},"apc_paid":{"value":3670,"currency":"USD","value_usd":3670},"fwci":2.7265,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.92160418,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"324","issue":null,"first_page":"104001","last_page":"104001"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9570000171661377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8906728625297546},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7701268196105957},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6973434686660767},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.6275498270988464},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.5806232690811157},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5692152976989746},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4296651780605316},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4148935377597809},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3965945839881897},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08779820799827576},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.07004201412200928}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8906728625297546},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7701268196105957},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6973434686660767},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.6275498270988464},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.5806232690811157},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5692152976989746},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4296651780605316},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4148935377597809},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3965945839881897},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08779820799827576},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.07004201412200928},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.artint.2023.104001","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.artint.2023.104001","pdf_url":null,"source":{"id":"https://openalex.org/S196139623","display_name":"Artificial Intelligence","issn_l":"0004-3702","issn":["0004-3702","1872-7921"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.artint.2023.104001","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.artint.2023.104001","pdf_url":null,"source":{"id":"https://openalex.org/S196139623","display_name":"Artificial Intelligence","issn_l":"0004-3702","issn":["0004-3702","1872-7921"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1569296262","https://openalex.org/W1981627423","https://openalex.org/W1984735703","https://openalex.org/W2004303440","https://openalex.org/W2009303086","https://openalex.org/W2055486654","https://openalex.org/W2064675550","https://openalex.org/W2085679424","https://openalex.org/W2109910161","https://openalex.org/W2112796928","https://openalex.org/W2145339207","https://openalex.org/W2149960632","https://openalex.org/W2155027007","https://openalex.org/W2158282517","https://openalex.org/W2159752377","https://openalex.org/W2596585349","https://openalex.org/W2602753196","https://openalex.org/W2612972698","https://openalex.org/W2787066086","https://openalex.org/W2913756371","https://openalex.org/W3041202696","https://openalex.org/W4294885165","https://openalex.org/W4306809390","https://openalex.org/W6635628852","https://openalex.org/W6642630624","https://openalex.org/W6672933780","https://openalex.org/W6679365036","https://openalex.org/W6679483443","https://openalex.org/W6680292960","https://openalex.org/W6680657880","https://openalex.org/W6680976742","https://openalex.org/W6683084483","https://openalex.org/W6683204974","https://openalex.org/W6727349600","https://openalex.org/W6729556111","https://openalex.org/W6734325300","https://openalex.org/W6744597842","https://openalex.org/W6745277162","https://openalex.org/W6748603076","https://openalex.org/W6758390756","https://openalex.org/W6760908582","https://openalex.org/W6802826383"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W273478400"],"abstract_inverted_index":{"To":[0],"achieve":[1],"the":[2,17,47,56,64,77,101,124,132,139,142,145,154,165,168,170,206,213,225],"ambitious":[3],"goals":[4],"of":[5,16,58,104,123,164,215,227],"artificial":[6],"intelligence,":[7],"reinforcement":[8],"learning":[9,27],"must":[10],"include":[11],"planning":[12,133,191],"with":[13,31],"a":[14,96,105,121,158,162,221],"model":[15,122],"world":[18],"that":[19,55,152,175],"is":[20,54,61,113,126],"abstract":[21],"in":[22,83,190],"state":[23,32,98,166],"and":[24,63,119,128,217,237,244,249],"time.":[25],"Deep":[26],"has":[28,37],"made":[29,129],"progress":[30],"abstraction,":[33],"but":[34],"temporal":[35],"abstraction":[36],"rarely":[38],"been":[39],"used,":[40],"despite":[41],"extensively":[42],"developed":[43],"theory":[44],"based":[45,160,197],"on":[46,144,161,198],"options":[48,60,196,203,216],"framework.":[49],"One":[50],"reason":[51],"for":[52,68],"this":[53],"space":[57,214],"possible":[59],"immense,":[62],"methods":[65],"previously":[66],"proposed":[67],"option":[69,78,125,171,176,228],"discovery":[70],"do":[71],"not":[72],"take":[73],"into":[74],"account":[75],"how":[76,233],"models":[79,177,238],"will":[80],"be":[81,188,241],"used":[82],"planning.":[84],"Options":[85],"are":[86,183],"typically":[87],"discovered":[88],"by":[89,205],"posing":[90],"subsidiary":[91],"tasks,":[92],"such":[93,180],"as":[94],"reaching":[95],"bottleneck":[97,199],"or":[99,201],"maximizing":[100],"cumulative":[102],"sum":[103],"sensory":[106],"signal":[107],"other":[108],"than":[109,192],"reward.":[110],"Each":[111],"subtask":[112],"solved":[114],"to":[115,131,187,224],"produce":[116],"an":[117],"option,":[118],"then":[120],"learned":[127,242],"available":[130],"process.":[134],"In":[135],"most":[136],"previous":[137],"work,":[138],"subtasks":[140,151,182,210],"ignore":[141],"reward":[143,156],"original":[146,155],"problem,":[147],"whereas":[148],"we":[149,231],"propose":[150],"use":[153],"plus":[157],"bonus":[159],"feature":[163],"at":[167],"time":[169],"terminates.":[172],"We":[173],"show":[174,232],"obtained":[178],"from":[179],"reward-respecting":[181,202],"much":[184],"more":[185],"likely":[186],"useful":[189],"eigenoptions,":[193],"shortest":[194],"path":[195],"states,":[200],"generated":[204],"option-critic.":[207],"Reward":[208],"respecting":[209],"strongly":[211],"constrain":[212],"thereby":[218],"also":[219],"provide":[220],"partial":[222],"solution":[223],"problem":[226],"discovery.":[229],"Finally,":[230],"values,":[234],"policies,":[235],"options,":[236],"can":[239],"all":[240],"online":[243],"off-policy":[245],"using":[246],"standard":[247],"algorithms":[248],"general":[250],"value":[251],"functions.":[252]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
