{"id":"https://openalex.org/W4382317716","doi":"https://doi.org/10.1609/aaai.v37i12.26763","title":"Safe Policy Improvement for POMDPs via Finite-State Controllers","display_name":"Safe Policy Improvement for POMDPs via Finite-State Controllers","publication_year":2023,"publication_date":"2023-06-26","ids":{"openalex":"https://openalex.org/W4382317716","doi":"https://doi.org/10.1609/aaai.v37i12.26763"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v37i12.26763","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1609/aaai.v37i12.26763","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/26763/26535","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/26763/26535","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035570774","display_name":"Thiago D. Sim\u00e3o","orcid":"https://orcid.org/0000-0002-3568-9464"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Thiago D. Sim\u00e3o","raw_affiliation_strings":["Radboud University Nijmegen"],"affiliations":[{"raw_affiliation_string":"Radboud University Nijmegen","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060649428","display_name":"Marnix Suilen","orcid":null},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Marnix Suilen","raw_affiliation_strings":["Radboud University Nijmegen"],"affiliations":[{"raw_affiliation_string":"Radboud University Nijmegen","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012669709","display_name":"Nils Jansen","orcid":"https://orcid.org/0000-0003-1318-8973"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Nils Jansen","raw_affiliation_strings":["Radboud University Nijmegen"],"affiliations":[{"raw_affiliation_string":"Radboud University Nijmegen","institution_ids":["https://openalex.org/I145872427"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035570774"],"corresponding_institution_ids":["https://openalex.org/I145872427"],"apc_list":null,"apc_paid":null,"fwci":0.9849,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.74793814,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"37","issue":"12","first_page":"15109","last_page":"15117"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9402999877929688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9402999877929688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.9068386554718018},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7910767793655396},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.743095874786377},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7392619848251343},{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.7010810971260071},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.6069808602333069},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5531972050666809},{"id":"https://openalex.org/keywords/finite-state","display_name":"Finite state","score":0.5102005004882812},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.48045605421066284},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.45530763268470764},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.3567180037498474},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.3100103735923767},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.2372998297214508},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2298767864704132},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18111732602119446},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.15014714002609253},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14972078800201416}],"concepts":[{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.9068386554718018},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7910767793655396},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.743095874786377},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7392619848251343},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.7010810971260071},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.6069808602333069},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5531972050666809},{"id":"https://openalex.org/C2983497884","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite state","level":3,"score":0.5102005004882812},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.48045605421066284},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.45530763268470764},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3567180037498474},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.3100103735923767},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.2372998297214508},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2298767864704132},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18111732602119446},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.15014714002609253},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14972078800201416},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1609/aaai.v37i12.26763","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1609/aaai.v37i12.26763","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/26763/26535","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/294799","is_oa":true,"landing_page_url":"https://hdl.handle.net/2066/294799","pdf_url":"https://repository.ubn.ru.nl//bitstream/handle/2066/294799/294799.pdf","source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article / Letter to editor"}],"best_oa_location":{"id":"doi:10.1609/aaai.v37i12.26763","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1609/aaai.v37i12.26763","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/26763/26535","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7599999904632568}],"awards":[{"id":"https://openalex.org/G1681793210","display_name":null,"funder_award_id":"KLEIN","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G2039356335","display_name":null,"funder_award_id":"1160.18.238","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G3914140545","display_name":null,"funder_award_id":"160.18.238","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G4371345817","display_name":null,"funder_award_id":"NWA.1160.18.238","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G5365867299","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G6009445997","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G641195207","display_name":"Provably Correct Policies for Uncertain Partially Observable Markov Decision Processes","funder_award_id":"OCENW.KLEIN.187","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4382317716.pdf"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W21934178","https://openalex.org/W201409579","https://openalex.org/W1524146404","https://openalex.org/W1555801537","https://openalex.org/W1591992921","https://openalex.org/W1640774615","https://openalex.org/W1647779468","https://openalex.org/W1701684472","https://openalex.org/W1835855307","https://openalex.org/W1931027396","https://openalex.org/W1977070092","https://openalex.org/W2011418219","https://openalex.org/W2034725503","https://openalex.org/W2058735307","https://openalex.org/W2068014951","https://openalex.org/W2087992130","https://openalex.org/W2107726111","https://openalex.org/W2168359464","https://openalex.org/W2171084228","https://openalex.org/W2438667436","https://openalex.org/W2472245206","https://openalex.org/W2545546008","https://openalex.org/W2604899404","https://openalex.org/W2809011908","https://openalex.org/W2904450917","https://openalex.org/W2963044034","https://openalex.org/W2963836326","https://openalex.org/W2965997659","https://openalex.org/W2971972375","https://openalex.org/W2998548529","https://openalex.org/W3011515502","https://openalex.org/W3022566517","https://openalex.org/W3037134744","https://openalex.org/W3038879850","https://openalex.org/W3088952667","https://openalex.org/W3093206925","https://openalex.org/W3094117125","https://openalex.org/W3134861104","https://openalex.org/W3166645952","https://openalex.org/W3169323432","https://openalex.org/W3215640983","https://openalex.org/W4213157992","https://openalex.org/W4214717370","https://openalex.org/W4225623389","https://openalex.org/W4281702291","https://openalex.org/W4287634477","https://openalex.org/W4298023569","https://openalex.org/W4382317716","https://openalex.org/W6635516443","https://openalex.org/W6637033844","https://openalex.org/W6640490175","https://openalex.org/W6676007687","https://openalex.org/W6677916085","https://openalex.org/W6684973485","https://openalex.org/W6736219079","https://openalex.org/W7001894244"],"related_works":["https://openalex.org/W2999848267","https://openalex.org/W2096013579","https://openalex.org/W1589140671","https://openalex.org/W1760611253","https://openalex.org/W52153049","https://openalex.org/W2951545791","https://openalex.org/W1515117609","https://openalex.org/W2294884454","https://openalex.org/W3169161914","https://openalex.org/W185460758"],"abstract_inverted_index":{"We":[0,132,187],"study":[1],"safe":[2],"policy":[3,36,67,104,151,163,175,181,205],"improvement":[4],"(SPI)":[5],"for":[6,92,198],"partially":[7],"observable":[8,127],"Markov":[9],"decision":[10],"processes":[11],"(POMDPs).":[12],"SPI":[13,47,90,155,159],"is":[14,81,109,185,227],"an":[15,29,69,149,153],"offline":[16,70],"reinforcement":[17],"learning":[18],"(RL)":[19],"problem":[20,91],"that":[21,37,78,96,106,171,189],"assumes":[22],"access":[23,51],"to":[24,52,61,88,111,119,123,166],"(1)":[25],"historical":[26,139],"data":[27,41,140,184],"about":[28],"environment,":[30],"and":[31,59,105,141,147],"(2)":[32],"the":[33,45,56,65,75,79,89,102,121,129,138,142,145,162,167,172,179,199,203,216,219],"so-called":[34],"behavior":[35,66,103,180,204],"previously":[38],"generated":[39],"this":[40,134,190],"by":[42,136],"interacting":[43],"with":[44,206],"environment.":[46],"methods":[48,73],"neither":[49],"require":[50],"a":[53,97,124,195],"model":[54],"nor":[55],"environment":[57,80],"itself,":[58],"aim":[60],"reliably":[62],"improve":[63],"upon":[64],"in":[68,222],"manner.":[71],"Existing":[72],"make":[74],"strong":[76],"assumption":[77,116],"fully":[82,126],"observable.":[83],"In":[84],"our":[85],"novel":[86],"approach":[87],"POMDPs,":[93],"we":[94],"assume":[95],"finite-state":[98,125],"controller":[99],"(FSC)":[100],"represents":[101],"finite":[107,225],"memory":[108,143,226],"sufficient":[110,183],"derive":[112],"optimal":[113],"policies.":[114],"This":[115],"allows":[117],"us":[118],"map":[120],"POMDP":[122],"MDP,":[128],"history":[130],"MDP.":[131],"estimate":[133],"MDP":[135],"combining":[137],"of":[144,218],"FSC,":[146],"compute":[148],"improved":[150],"using":[152],"off-the-shelf":[154],"algorithm.":[156],"The":[157],"underlying":[158],"method":[160],"constrains":[161],"space":[164],"according":[165],"available":[168],"data,":[169],"such":[170],"newly":[173],"computed":[174],"only":[176],"differs":[177],"from":[178],"when":[182],"available.":[186],"show":[188,215],"new":[191,196],"policy,":[192],"converted":[193],"into":[194],"FSC":[197],"(unknown)":[200],"POMDP,":[201],"outperforms":[202],"high":[207],"probability.":[208],"Experimental":[209],"results":[210],"on":[211],"several":[212],"well-established":[213],"benchmarks":[214],"applicability":[217],"approach,":[220],"even":[221],"cases":[223],"where":[224],"not":[228],"sufficient.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
