{"id":"https://openalex.org/W3166673791","doi":"https://doi.org/10.24963/ijcai.2021/299","title":"Reconciling Rewards with Predictive State Representations","display_name":"Reconciling Rewards with Predictive State Representations","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3166673791","doi":"https://doi.org/10.24963/ijcai.2021/299","mag":"3166673791"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/299","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/299","pdf_url":"https://www.ijcai.org/proceedings/2021/0299.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0299.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033275466","display_name":"Andrea Baisero","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrea Baisero","raw_affiliation_strings":["Northeastern University, Boston, MA","Northeastern University, Boston MA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Northeastern University, Boston MA#TAB#","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033129735","display_name":"Christopher Amato","orcid":"https://orcid.org/0000-0002-6786-7384"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]},{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX","US"],"is_corresponding":false,"raw_author_name":"Christopher Amato","raw_affiliation_strings":["Northeastern University, Boston, MA","Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06830311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2170","last_page":"2176"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.8930149078369141},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6632368564605713},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6551164388656616},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5980382561683655},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5929025411605835},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5180132985115051},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4807462692260742},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.44874662160873413},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4253080189228058},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39754435420036316},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.3874155282974243},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37836533784866333},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.27536261081695557},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24569198489189148},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1695554554462433},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08895987272262573}],"concepts":[{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.8930149078369141},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6632368564605713},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6551164388656616},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5980382561683655},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5929025411605835},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5180132985115051},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4807462692260742},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.44874662160873413},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4253080189228058},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39754435420036316},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.3874155282974243},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37836533784866333},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.27536261081695557},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24569198489189148},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1695554554462433},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08895987272262573},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.24963/ijcai.2021/299","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/299","pdf_url":"https://www.ijcai.org/proceedings/2021/0299.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.03926","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.03926","pdf_url":"https://arxiv.org/pdf/2106.03926","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3166673791","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2106.03926v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.03926","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.03926","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/299","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/299","pdf_url":"https://www.ijcai.org/proceedings/2021/0299.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4399999976158142,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G5027711510","display_name":"NSF-BSF: RI: Small: Decentralized Active Goal Recognition","funder_award_id":"1816382","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3166673791.pdf","grobid_xml":"https://content.openalex.org/works/W3166673791.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1491973539","https://openalex.org/W1497506444","https://openalex.org/W1499044922","https://openalex.org/W1540337045","https://openalex.org/W1565405003","https://openalex.org/W1640774615","https://openalex.org/W1827128995","https://openalex.org/W1987974668","https://openalex.org/W2034725503","https://openalex.org/W2042830238","https://openalex.org/W2099873296","https://openalex.org/W2111680853","https://openalex.org/W2123157758","https://openalex.org/W2123542217","https://openalex.org/W2154384352","https://openalex.org/W2158282517","https://openalex.org/W2163126463","https://openalex.org/W2166610875"],"related_works":["https://openalex.org/W3188824980","https://openalex.org/W202397308","https://openalex.org/W1565405003","https://openalex.org/W2058853801","https://openalex.org/W1512008671","https://openalex.org/W2111680853","https://openalex.org/W1827128995","https://openalex.org/W2183826014","https://openalex.org/W1568770747","https://openalex.org/W2137867909","https://openalex.org/W2140352872","https://openalex.org/W2186412383","https://openalex.org/W2273762088","https://openalex.org/W2331017","https://openalex.org/W2407775036","https://openalex.org/W2611988551","https://openalex.org/W2123372395","https://openalex.org/W2261692840","https://openalex.org/W2144041366","https://openalex.org/W2337917802"],"abstract_inverted_index":{"Predictive":[0],"state":[1,120],"representations":[2,121],"(PSRs)":[3],"are":[4],"models":[5,53,129,178],"of":[6,44,51,103,125,179],"controlled":[7],"non-Markov":[8],"observation":[9],"sequences":[10],"which":[11,46,69,127],"exhibit":[12],"the":[13,35,42,48,90,114,152,161],"same":[14],"generative":[15,177],"process":[16],"governing":[17],"POMDP":[18,79,149,170],"observations":[19,131,180],"without":[20],"relying":[21],"on":[22],"an":[23],"underlying":[24],"latent":[25],"state.":[26],"In":[27],"that":[28,83,99,142],"respect,":[29],"a":[30,63,72,100,107,123,145],"PSR":[31,52,73,154],"is":[32,74,93,144],"indistinguishable":[33],"from":[34,106,157],"corresponding":[36],"POMDP.":[37],"However,":[38],"PSRs":[39,126],"notoriously":[40],"ignore":[41],"notion":[43],"rewards,":[45,80,133],"undermines":[47],"general":[49],"utility":[50],"for":[54,138],"control,":[55],"planning,":[56],"or":[57],"reinforcement":[58],"learning.":[59],"Therefore,":[60],"we":[61,81,97],"describe":[62],"sufficient":[64],"and":[65,96,132,134,151,181],"necessary":[66],"accuracy":[67,91,115],"condition":[68,92],"determines":[70],"whether":[71],"able":[75],"to":[76],"accurately":[77,128],"model":[78],"show":[82,141],"rewards":[84],"can":[85],"be":[86],"approximated":[87],"even":[88],"when":[89],"not":[94,112],"satisfied,":[95],"find":[98],"non-trivial":[101],"number":[102],"POMDPs":[104],"taken":[105],"well-known":[108],"third-party":[109],"repository":[110],"do":[111],"satisfy":[113],"condition.":[116],"We":[117,140],"propose":[118],"reward-predictive":[119],"(R-PSRs),":[122],"generalization":[124],"both":[130],"develop":[135],"value":[136],"iteration":[137],"R-PSRs.":[139],"there":[143],"mismatch":[146],"between":[147],"optimal":[148,153,164,169],"policies":[150,155,166],"derived":[156],"approximate":[158],"rewards.":[159,182],"On":[160],"other":[162],"hand,":[163],"R-PSR":[165],"perfectly":[167],"match":[168],"policies,":[171],"reconfirming":[172],"R-PSRs":[173],"as":[174],"accurate":[175],"state-less":[176]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
