{"id":"https://openalex.org/W7133488227","doi":"https://doi.org/10.1007/s10458-026-09736-w","title":"Generalized policy improvement for efficient and robust multi-objective reinforcement learning","display_name":"Generalized policy improvement for efficient and robust multi-objective reinforcement learning","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7133488227","doi":"https://doi.org/10.1007/s10458-026-09736-w"},"language":"en","primary_location":{"id":"doi:10.1007/s10458-026-09736-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-026-09736-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09736-w.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09736-w.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025381800","display_name":"Lucas N. Alegre","orcid":"https://orcid.org/0000-0001-5465-4390"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]},{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE","BR"],"is_corresponding":true,"raw_author_name":"Lucas N. Alegre","raw_affiliation_strings":["Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium","Institute of Informatics, Universidade Federal do Rio Grande do Sul, Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"Institute of Informatics, Universidade Federal do Rio Grande do Sul, Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029791521","display_name":"A L. C. BAZZAN","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Ana L. C. Bazzan","raw_affiliation_strings":["Institute of Informatics, Universidade Federal do Rio Grande do Sul, Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics, Universidade Federal do Rio Grande do Sul, Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081436755","display_name":"Diederik M. Roijers","orcid":"https://orcid.org/0000-0002-2825-2491"},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]},{"id":"https://openalex.org/I55106644","display_name":"Amsterdam University of Applied Sciences","ror":"https://ror.org/00y2z2s03","country_code":"NL","type":"education","lineage":["https://openalex.org/I55106644"]}],"countries":["BE","NL"],"is_corresponding":false,"raw_author_name":"Diederik M. Roijers","raw_affiliation_strings":["Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium","Innovation Dept., City of Amsterdam, Amsterdam, the Netherlands"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"Innovation Dept., City of Amsterdam, Amsterdam, the Netherlands","institution_ids":["https://openalex.org/I55106644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128103403","display_name":"Ann Now\u00e9","orcid":null},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Ann Now\u00e9","raw_affiliation_strings":["Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Lab, Vrije Universiteit Brussel, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5128043966","display_name":"Bruno C. da Silva","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bruno C. da Silva","raw_affiliation_strings":["CICS, University of Massachusetts, Amherst, MA, USA"],"affiliations":[{"raw_affiliation_string":"CICS, University of Massachusetts, Amherst, MA, USA","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5025381800"],"corresponding_institution_ids":["https://openalex.org/I130442723","https://openalex.org/I13469542"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70979911,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.689300000667572,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.689300000667572,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.10700000077486038,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.02250000089406967,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7681999802589417},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.5676000118255615},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5546000003814697},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5480999946594238},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5205000042915344},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4424000084400177},{"id":"https://openalex.org/keywords/monotonic-function","display_name":"Monotonic function","score":0.42969998717308044},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.42260000109672546}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7681999802589417},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6520000100135803},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.5676000118255615},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5629000067710876},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5546000003814697},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5480999946594238},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5205000042915344},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4424000084400177},{"id":"https://openalex.org/C72169020","wikidata":"https://www.wikidata.org/wiki/Q194404","display_name":"Monotonic function","level":2,"score":0.42969998717308044},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.42260000109672546},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3880999982357025},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37940001487731934},{"id":"https://openalex.org/C2777615720","wikidata":"https://www.wikidata.org/wiki/Q11888847","display_name":"Prioritization","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C176248197","wikidata":"https://www.wikidata.org/wiki/Q458526","display_name":"Probably approximately correct learning","level":4,"score":0.3506999909877777},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.31690001487731934},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29010000824928284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2854999899864197},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10458-026-09736-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-026-09736-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09736-w.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10458-026-09736-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-026-09736-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09736-w.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7439017295837402}],"awards":[{"id":"https://openalex.org/G1950316668","display_name":null,"funder_award_id":"CAPES","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"},{"id":"https://openalex.org/G2369280111","display_name":null,"funder_award_id":"2020/05165-1","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"},{"id":"https://openalex.org/G2664932543","display_name":"Communication and machine learning in urban mobility: a multiagent and multiobjective approach","funder_award_id":"20/05165-1","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"},{"id":"https://openalex.org/G3331681876","display_name":null,"funder_award_id":"CAPES","funder_id":"https://openalex.org/F4320322025","funder_display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico"},{"id":"https://openalex.org/G3601516360","display_name":null,"funder_award_id":"Brazil","funder_id":"https://openalex.org/F4320322025","funder_display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico"},{"id":"https://openalex.org/G3690646728","display_name":null,"funder_award_id":"00x0ma614","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G3883789297","display_name":null,"funder_award_id":"Code 00","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G3938906640","display_name":null,"funder_award_id":"(CAPES)","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G4600047395","display_name":null,"funder_award_id":"FAPESP","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G5079005330","display_name":null,"funder_award_id":"support","funder_id":"https://openalex.org/F4320322025","funder_display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico"},{"id":"https://openalex.org/G5139334624","display_name":null,"funder_award_id":"Coordena\u00e7\u00e3o","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G5696276328","display_name":null,"funder_award_id":"Finance","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G692611148","display_name":null,"funder_award_id":"Brazil","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G7592820750","display_name":null,"funder_award_id":"2020/05165-1","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G8464931132","display_name":null,"funder_award_id":"Process","funder_id":"https://openalex.org/F4320322025","funder_display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico"},{"id":"https://openalex.org/G864811111","display_name":null,"funder_award_id":"2/2021","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G8721624225","display_name":null,"funder_award_id":"140500/2021-9","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G8837013128","display_name":null,"funder_award_id":"Code 0","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"},{"id":"https://openalex.org/G963174003","display_name":null,"funder_award_id":"de Aperfei\u00e7","funder_id":"https://openalex.org/F4320321091","funder_display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior"}],"funders":[{"id":"https://openalex.org/F4320320997","display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo","ror":"https://ror.org/02ddkpn78"},{"id":"https://openalex.org/F4320321091","display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","ror":"https://ror.org/00x0ma614"},{"id":"https://openalex.org/F4320321730","display_name":"Fonds Wetenschappelijk Onderzoek","ror":"https://ror.org/03qtxy027"},{"id":"https://openalex.org/F4320321992","display_name":"Minist\u00e9rio da Ci\u00eancia, Tecnologia e Inova\u00e7\u00e3o","ror":"https://ror.org/050zdnc69"},{"id":"https://openalex.org/F4320322025","display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","ror":"https://ror.org/03swz6y49"},{"id":"https://openalex.org/F4320327336","display_name":"Vlaamse regering","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7133488227.pdf","grobid_xml":"https://content.openalex.org/works/W7133488227.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W2012612381","https://openalex.org/W2022923563","https://openalex.org/W2048226872","https://openalex.org/W2102660061","https://openalex.org/W2123516968","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2560398012","https://openalex.org/W2625286567","https://openalex.org/W2766447205","https://openalex.org/W2961186683","https://openalex.org/W2963582482","https://openalex.org/W2966234803","https://openalex.org/W2997289589","https://openalex.org/W3011215967","https://openalex.org/W3021613070","https://openalex.org/W3021855001","https://openalex.org/W3072315125","https://openalex.org/W3107153805","https://openalex.org/W4210870706","https://openalex.org/W4287266177","https://openalex.org/W4319441257","https://openalex.org/W4322729780","https://openalex.org/W4367311493","https://openalex.org/W4367311517","https://openalex.org/W4389508974","https://openalex.org/W4415796169","https://openalex.org/W6968906521","https://openalex.org/W7124314397","https://openalex.org/W7124350175","https://openalex.org/W7124596363","https://openalex.org/W7133199224","https://openalex.org/W7133214352","https://openalex.org/W7133234825"],"related_works":[],"abstract_inverted_index":{"Multi-objective":[0],"reinforcement":[1],"learning":[2],"(MORL)":[3],"algorithms":[4,21,237],"tackle":[5],"sequential":[6],"decision":[7],"problems":[8],"where":[9],"agents":[10],"may":[11],"have":[12],"different":[13,43],"preferences":[14,102],"over":[15],"(possibly":[16],"conflicting)":[17],"reward":[18],"functions.":[19],"These":[20,67],"often":[22],"learn":[23,97],"a":[24,31,47,104,123,135,163,190],"set":[25],"of":[26,126,154,195,206,225],"policies,":[27,212],"each":[28,87],"optimized":[29],"for":[30,42,99],"particular":[32],"agent":[33,75,101,142],"preference,":[34],"that":[35,50,63,165,197,215,231],"are":[36],"later":[37],"reused":[38],"when":[39],"optimizing":[40],"policies":[41,98,180],"preferences.":[44],"We":[45,109,160,228],"introduce":[46,162],"novel":[48,105,191],"algorithm":[49,112],"builds":[51],"upon":[52],"Generalized":[53],"Policy":[54],"Improvement":[55],"(GPI)":[56],"to":[57,69,83,96,115,118,173,202],"construct":[58],"principled,":[59],"formally-derived":[60],"prioritization":[61],"schemes":[62],"improve":[64],"sample":[65],"efficiency.":[66],"correspond":[68],"active-learning":[70],"strategies":[71],"by":[72,178,182],"which":[73],"the":[74,79,91,141,152,167,174,204,222,226],"can":[76,143],"identify":[77,145],"(i)":[78],"most":[80,92],"promising":[81],"preferences/objectives":[82],"train":[84],"on":[85],"at":[86],"moment;":[88],"and":[89,213],"(ii)":[90],"relevant":[93],"previous":[94],"experiences":[95],"new":[100],"through":[103],"Dyna-style":[106],"MORL":[107,236],"method.":[108],"prove":[110,214],"our":[111,183,232],"is":[113],"guaranteed":[114],"always":[116],"converge":[117],"an":[119,129],"optimal":[120,175],"solution":[121,133],"in":[122,210,238],"finite":[124],"number":[125],"steps,":[127],"or":[128],"$$\\epsilon":[130,137],"$$":[131,138],"-optimal":[132],"(for":[134],"bounded":[136],")":[139],"if":[140],"only":[144],"sub-optimal":[146],"policies.":[147],"Our":[148],"method":[149,184,233],"monotonically":[150],"improves":[151],"quality":[153],"its":[155],"partial":[156],"solutions":[157],"while":[158],"learning.":[159,186],"also":[161],"bound":[164],"characterizes":[166],"maximum":[168],"utility":[169],"loss":[170],"(with":[171],"respect":[172],"solution)":[176],"incurred":[177],"intermediate":[179],"identified":[181],"during":[185],"Finally,":[187],"we":[188],"propose":[189],"epistemic":[192],"uncertainty-aware":[193],"extension":[194],"GPI":[196,211],"exploits":[198],"high-confidence":[199],"lower":[200],"bounds":[201,220],"mitigate":[203],"impact":[205],"unreliable":[207],"action-value":[208],"estimates":[209],"it":[216],"provides":[217],"tighter":[218],"performance":[219],"than":[221],"current":[223],"state":[224],"art.":[227],"empirically":[229],"show":[230],"outperforms":[234],"state-of-the-art":[235],"challenging":[239],"multi-objective":[240],"tasks.":[241]},"counts_by_year":[],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2026-03-05T00:00:00"}
