{"id":"https://openalex.org/W4415903164","doi":"https://doi.org/10.24963/kr.2025/55","title":"Pushdown Reward Machines for Reinforcement Learning","display_name":"Pushdown Reward Machines for Reinforcement Learning","publication_year":2025,"publication_date":"2025-11-01","ids":{"openalex":"https://openalex.org/W4415903164","doi":"https://doi.org/10.24963/kr.2025/55"},"language":null,"primary_location":{"id":"doi:10.24963/kr.2025/55","is_oa":true,"landing_page_url":"https://doi.org/10.24963/kr.2025/55","pdf_url":"https://proceedings.kr.org/2025/55/kr2025-0055-varricchione-et-al.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the TwentySecond International Conference on Principles of Knowledge Representation and Reasoning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://proceedings.kr.org/2025/55/kr2025-0055-varricchione-et-al.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092762573","display_name":"Giovanni Varricchione","orcid":"https://orcid.org/0000-0002-5466-9012"},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Giovanni Varricchione","raw_affiliation_strings":["Utrecht Universiteit, Utrecht, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Utrecht Universiteit, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I193662353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008606761","display_name":"Toryn Q. Klassen","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Toryn Q. Klassen","raw_affiliation_strings":["University of Toronto, Toronto, Canada","Vector Institute, Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]},{"raw_affiliation_string":"Vector Institute, Toronto, Canada","institution_ids":["https://openalex.org/I4210127509"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064693586","display_name":"Natasha Alechina","orcid":"https://orcid.org/0000-0003-3306-9891"},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]},{"id":"https://openalex.org/I7876267","display_name":"Open University of the Netherlands","ror":"https://ror.org/018dfmf50","country_code":"NL","type":"education","lineage":["https://openalex.org/I7876267"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Natasha Alechina","raw_affiliation_strings":["Open Universiteit, Heerlen, The Netherlands","Utrecht Universiteit, Utrecht, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Open Universiteit, Heerlen, The Netherlands","institution_ids":["https://openalex.org/I7876267"]},{"raw_affiliation_string":"Utrecht Universiteit, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I193662353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028760842","display_name":"Mehdi Dastani","orcid":"https://orcid.org/0000-0002-4641-4087"},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Mehdi Dastani","raw_affiliation_strings":["Utrecht Universiteit, Utrecht, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Utrecht Universiteit, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I193662353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052831741","display_name":"Brian Logan","orcid":"https://orcid.org/0000-0003-0648-7107"},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]},{"id":"https://openalex.org/I195460627","display_name":"University of Aberdeen","ror":"https://ror.org/016476m91","country_code":"GB","type":"education","lineage":["https://openalex.org/I195460627"]}],"countries":["GB","NL"],"is_corresponding":false,"raw_author_name":"Brian Logan","raw_affiliation_strings":["University of Aberdeen, Aberdeen, United Kingdom","Utrecht Universiteit, Utrecht, The Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Aberdeen, Aberdeen, United Kingdom","institution_ids":["https://openalex.org/I195460627"]},{"raw_affiliation_string":"Utrecht Universiteit, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I193662353"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041982650","display_name":"Sheila A. McIlraith","orcid":"https://orcid.org/0000-0003-4953-0945"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Sheila A. McIlraith","raw_affiliation_strings":["University of Toronto, Toronto, Canada","Vector Institute, Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]},{"raw_affiliation_string":"Vector Institute, Toronto, Canada","institution_ids":["https://openalex.org/I4210127509"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092762573"],"corresponding_institution_ids":["https://openalex.org/I193662353"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16560214,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"566","last_page":"576"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.7046999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.7046999931335449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.1137000024318695,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.05559999868273735,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.685699999332428},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.545799970626831},{"id":"https://openalex.org/keywords/constant","display_name":"Constant (computer programming)","score":0.4903999865055084},{"id":"https://openalex.org/keywords/pushdown-automaton","display_name":"Pushdown automaton","score":0.4754999876022339},{"id":"https://openalex.org/keywords/finite-state-machine","display_name":"Finite-state machine","score":0.412200003862381},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.4108999967575073},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.3939000070095062},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.38370001316070557},{"id":"https://openalex.org/keywords/automaton","display_name":"Automaton","score":0.3790000081062317}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.692799985408783},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.685699999332428},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.545799970626831},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.4903999865055084},{"id":"https://openalex.org/C77944639","wikidata":"https://www.wikidata.org/wiki/Q751443","display_name":"Pushdown automaton","level":3,"score":0.4754999876022339},{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41179999709129333},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.4108999967575073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4020000100135803},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.38370001316070557},{"id":"https://openalex.org/C112505250","wikidata":"https://www.wikidata.org/wiki/Q787116","display_name":"Automaton","level":2,"score":0.3790000081062317},{"id":"https://openalex.org/C195818886","wikidata":"https://www.wikidata.org/wiki/Q5421724","display_name":"Expressive power","level":2,"score":0.36480000615119934},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.34470000863075256},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C9395851","wikidata":"https://www.wikidata.org/wiki/Q177929","display_name":"Stack (abstract data type)","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3176000118255615},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.28949999809265137},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C92117001","wikidata":"https://www.wikidata.org/wiki/Q6997829","display_name":"Nested word","level":5,"score":0.2797999978065491},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26980000734329224},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26420000195503235},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2632000148296356}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/kr.2025/55","is_oa":true,"landing_page_url":"https://doi.org/10.24963/kr.2025/55","pdf_url":"https://proceedings.kr.org/2025/55/kr2025-0055-varricchione-et-al.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the TwentySecond International Conference on Principles of Knowledge Representation and Reasoning","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/kr.2025/55","is_oa":true,"landing_page_url":"https://doi.org/10.24963/kr.2025/55","pdf_url":"https://proceedings.kr.org/2025/55/kr2025-0055-varricchione-et-al.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the TwentySecond International Conference on Principles of Knowledge Representation and Reasoning","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8284766523","display_name":null,"funder_award_id":"(NSERC)","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320319880","display_name":"Government of Canada","ror":"https://ror.org/010q4q527"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415903164.pdf","grobid_xml":"https://content.openalex.org/works/W4415903164.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reward":[0],"machines":[1,52,58],"(RMs)":[2],"are":[3],"automata":[4],"structures":[5],"that":[6,30,175],"encode":[7],"(non-Markovian)":[8],"reward":[9,17,51,57,68,82],"functions":[10],"for":[11,161,171],"reinforcement":[12],"learning":[13,164],"(RL).":[14],"RMs":[15],"can":[16,65,105,190],"any":[18],"behaviour":[19],"representable":[20,72,196],"in":[21,42,73,197],"regular":[22],"languages":[23,200],"and,":[24],"when":[25,126],"paired":[26],"with":[27,179],"RL":[28,173],"algorithms":[29,174],"exploit":[31],"RM":[32],"structure,":[33],"have":[34],"been":[35],"shown":[36],"to":[37,95,124,193],"significantly":[38],"improve":[39],"sample":[40],"efficiency":[41],"many":[43],"domains.":[44],"In":[45],"this":[46],"work,":[47],"we":[48,167],"present":[49],"pushdown":[50,62],"(pdRMs),":[53],"an":[54,169],"extension":[55],"of":[56,88,99,117,130,155],"based":[59],"on":[60],"deterministic":[61,74,198],"automata.":[63],"pdRMs":[64],"recognise":[66],"and":[67,102,137,157],"temporally":[69],"extended":[70],"behaviours":[71],"context-free":[75,199],"languages,":[76],"making":[77],"them":[78],"more":[79],"expressive":[80,153],"than":[81],"machines.":[83],"We":[84,120,146,181],"introduce":[85],"two":[86,128],"variants":[87],"pdRM-based":[89],"policies,":[90],"one":[91,103],"which":[92,104],"has":[93],"access":[94,107],"the":[96,100,108,118,127,141,152,162],"entire":[97],"stack":[98],"pdRM,":[101,136],"only":[106],"top":[109],"k":[110],"symbols":[111],"(for":[112,132],"a":[113,122,133],"given":[114,134],"constant":[115,138],"k)":[116,139],"stack.":[119],"propose":[121,168],"procedure":[123],"check":[125],"kinds":[129],"policies":[131],"environment,":[135],"achieve":[140],"same":[142],"optimal":[143],"state":[144],"values.":[145],"then":[147],"provide":[148],"theoretical":[149],"results":[150,160,186],"establishing":[151],"power":[154],"pdRMs,":[156],"space":[158],"complexity":[159],"proposed":[163],"problems.":[165],"Lastly,":[166],"approach":[170],"off-policy":[172],"exploits":[176],"counterfactual":[177],"experiences":[178],"pdRMs.":[180,202],"conclude":[182],"by":[183],"providing":[184],"experimental":[185],"showing":[187],"how":[188],"agents":[189],"be":[191],"trained":[192],"perform":[194],"tasks":[195],"using":[201]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-11-05T00:00:00"}
