{"id":"https://openalex.org/W3188288352","doi":"https://doi.org/10.24963/ijcai.2021/76","title":"Multi-Objective Reinforcement Learning for Designing Ethical Environments","display_name":"Multi-Objective Reinforcement Learning for Designing Ethical Environments","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W3188288352","doi":"https://doi.org/10.24963/ijcai.2021/76","mag":"3188288352"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/76","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/76","pdf_url":"https://www.ijcai.org/proceedings/2021/0076.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0076.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080841253","display_name":"Manel Rodr\u00edguez-Soto","orcid":"https://orcid.org/0000-0003-1339-2018"},"institutions":[{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Manel Rodriguez-Soto","raw_affiliation_strings":["Artificial Intelligence Research Institute (IIIA-CSIC), Bellaterra, Spain"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Research Institute (IIIA-CSIC), Bellaterra, Spain","institution_ids":["https://openalex.org/I4210131846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028762293","display_name":"Maite L\u00f3pez-S\u00e1nchez","orcid":"https://orcid.org/0000-0002-1838-5928"},"institutions":[{"id":"https://openalex.org/I71999127","display_name":"Universitat de Barcelona","ror":"https://ror.org/021018s57","country_code":"ES","type":"education","lineage":["https://openalex.org/I71999127"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Maite Lopez-Sanchez","raw_affiliation_strings":["Universitat de Barcelona (UB), Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Universitat de Barcelona (UB), Barcelona, Spain","institution_ids":["https://openalex.org/I71999127"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005383856","display_name":"Juan A. Rodr\u00edguez-Aguilar","orcid":"https://orcid.org/0000-0002-2940-6886"},"institutions":[{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Juan A. Rodriguez Aguilar","raw_affiliation_strings":["Artificial Intelligence Research Institute (IIIA-CSIC), Bellaterra, Spain"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Research Institute (IIIA-CSIC), Bellaterra, Spain","institution_ids":["https://openalex.org/I4210131846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080841253"],"corresponding_institution_ids":["https://openalex.org/I4210131846"],"apc_list":null,"apc_paid":null,"fwci":1.8195,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.87867598,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"545","last_page":"551"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9578999876976013,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.824967086315155},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7133444547653198},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.669122040271759},{"id":"https://openalex.org/keywords/headway","display_name":"Headway","score":0.49583038687705994},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4525560140609741},{"id":"https://openalex.org/keywords/autonomous-agent","display_name":"Autonomous agent","score":0.44732967019081116},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.4306902289390564},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4231032431125641},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.3578626811504364},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.1508389413356781}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.824967086315155},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7133444547653198},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.669122040271759},{"id":"https://openalex.org/C2779240695","wikidata":"https://www.wikidata.org/wiki/Q4383682","display_name":"Headway","level":2,"score":0.49583038687705994},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4525560140609741},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.44732967019081116},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.4306902289390564},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4231032431125641},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3578626811504364},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.1508389413356781}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.24963/ijcai.2021/76","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/76","pdf_url":"https://www.ijcai.org/proceedings/2021/0076.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:diposit.ub.edu:2445/217805","is_oa":false,"landing_page_url":"https://hdl.handle.net/2445/217805","pdf_url":null,"source":{"id":"https://openalex.org/S4306401653","display_name":"Dip\u00f2sit Digital de la Universitat de Barcelona (Universitat de Barcelona)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71999127","host_organization_name":"Universitat de Barcelona","host_organization_lineage":["https://openalex.org/I71999127"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Comunicacions a congressos  (Matem\u00e0tiques i Inform\u00e0tica)","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/76","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/76","pdf_url":"https://www.ijcai.org/proceedings/2021/0076.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3827193553","display_name":null,"funder_award_id":"952215","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G465331839","display_name":null,"funder_award_id":"Crowd4SDG (H2020-872944)","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G5163439141","display_name":null,"funder_award_id":"825619","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G5175525644","display_name":null,"funder_award_id":"PID2019","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G5237975905","display_name":null,"funder_award_id":"H2020-872944","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G54156955","display_name":null,"funder_award_id":"COREDEM (H2020-785907)","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G5464936042","display_name":null,"funder_award_id":"872944","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G6084504639","display_name":null,"funder_award_id":"TAILOR (H2020-952215)","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G6235973325","display_name":null,"funder_award_id":"769142","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G6607577419","display_name":null,"funder_award_id":"785907","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G8116750037","display_name":null,"funder_award_id":"H2020-785907","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3188288352.pdf","grobid_xml":"https://content.openalex.org/works/W3188288352.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1595037444","https://openalex.org/W1866745826","https://openalex.org/W2058432138","https://openalex.org/W2141481921","https://openalex.org/W2240086230","https://openalex.org/W2462906003","https://openalex.org/W2508058976","https://openalex.org/W2508236932","https://openalex.org/W2565840370","https://openalex.org/W2566902129","https://openalex.org/W2762221702","https://openalex.org/W2768908787","https://openalex.org/W2772337934","https://openalex.org/W2794632992","https://openalex.org/W2823329128","https://openalex.org/W2960675728","https://openalex.org/W2963177864","https://openalex.org/W2964616600","https://openalex.org/W2964627913","https://openalex.org/W3037708944","https://openalex.org/W3103262232","https://openalex.org/W4214717370","https://openalex.org/W4238827160","https://openalex.org/W4300427736"],"related_works":["https://openalex.org/W1990084320","https://openalex.org/W3011309105","https://openalex.org/W2363400661","https://openalex.org/W2051775676","https://openalex.org/W2140196366","https://openalex.org/W44667219","https://openalex.org/W4313172028","https://openalex.org/W2768698792","https://openalex.org/W4206233823","https://openalex.org/W2494202692"],"abstract_inverted_index":{"AI":[0],"research":[1],"is":[2,31,79],"being":[3],"challenged":[4],"with":[5,17],"ensuring":[6],"that":[7,35,54,82,131],"autonomous":[8],"agents":[9,37],"learn":[10,58],"to":[11,32,38,42,59,86,106,127],"behave":[12,39,60,87],"ethically,":[13],"namely":[14],"in":[15],"alignment":[16],"moral":[18],"values.":[19],"A":[20],"common":[21],"approach,":[22],"founded":[23],"on":[24,123],"the":[25,43,99,108,133],"exploitation":[26],"of":[27,45,74,102,110,135],"Reinforcement":[28,104],"Learning":[29,105],"techniques,":[30],"design":[33,134],"environments":[34,76],"incentivise":[36],"ethically.":[40,61],"However,":[41],"best":[44],"our":[46,124],"knowledge,":[47],"current":[48],"approaches":[49],"do":[50],"not":[51],"theoretically":[52],"guarantee":[53],"an":[55,83,111,129],"agent":[56,84],"will":[57],"Here,":[62],"we":[63,121],"make":[64],"headway":[65],"along":[66],"this":[67],"direction":[68],"by":[69],"proposing":[70],"a":[71,118],"novel":[72],"way":[73],"designing":[75],"wherein":[77],"it":[78],"formally":[80],"guaranteed":[81],"learns":[85],"ethically":[88],"while":[89],"pursuing":[90],"its":[91],"individual":[92,113],"objectives.":[93,116],"Our":[94],"theoretical":[95,125],"results":[96,126],"develop":[97],"within":[98],"formal":[100],"framework":[101],"Multi-Objective":[103],"ease":[107],"handling":[109],"agent's":[112],"and":[114],"ethical":[115,136],"As":[117],"further":[119],"contribution,":[120],"leverage":[122],"introduce":[128],"algorithm":[130],"automates":[132],"environments.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":5}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
