{"id":"https://openalex.org/W4210389853","doi":"https://doi.org/10.1007/s10676-022-09635-0","title":"Instilling moral value alignment by means of multi-objective reinforcement learning","display_name":"Instilling moral value alignment by means of multi-objective reinforcement learning","publication_year":2022,"publication_date":"2022-01-24","ids":{"openalex":"https://openalex.org/W4210389853","doi":"https://doi.org/10.1007/s10676-022-09635-0"},"language":"en","primary_location":{"id":"doi:10.1007/s10676-022-09635-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10676-022-09635-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10676-022-09635-0.pdf","source":{"id":"https://openalex.org/S13096939","display_name":"Ethics and Information Technology","issn_l":"1388-1957","issn":["1388-1957","1572-8439"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ethics and Information Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10676-022-09635-0.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080841253","display_name":"Manel Rodr\u00edguez-Soto","orcid":"https://orcid.org/0000-0003-1339-2018"},"institutions":[{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Manel Rodriguez-Soto","raw_affiliation_strings":["Artificial intelligence research institute (IIIA-CSIC), Carrer de Can Planas, Campus de la UAB, 08193, Bellaterra, Spain"],"affiliations":[{"raw_affiliation_string":"Artificial intelligence research institute (IIIA-CSIC), Carrer de Can Planas, Campus de la UAB, 08193, Bellaterra, Spain","institution_ids":["https://openalex.org/I4210131846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032451012","display_name":"Marc Serramia","orcid":"https://orcid.org/0000-0003-0993-024X"},"institutions":[{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Marc Serramia","raw_affiliation_strings":["Artificial intelligence research institute (IIIA-CSIC), Carrer de Can Planas, Campus de la UAB, 08193, Bellaterra, Spain"],"affiliations":[{"raw_affiliation_string":"Artificial intelligence research institute (IIIA-CSIC), Carrer de Can Planas, Campus de la UAB, 08193, Bellaterra, Spain","institution_ids":["https://openalex.org/I4210131846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028762293","display_name":"Maite L\u00f3pez-S\u00e1nchez","orcid":"https://orcid.org/0000-0002-1838-5928"},"institutions":[{"id":"https://openalex.org/I71999127","display_name":"Universitat de Barcelona","ror":"https://ror.org/021018s57","country_code":"ES","type":"education","lineage":["https://openalex.org/I71999127"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Maite Lopez-Sanchez","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Barcelona, Gran Via de les Corts Catalanes, 585, 08007, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Barcelona, Gran Via de les Corts Catalanes, 585, 08007, Barcelona, Spain","institution_ids":["https://openalex.org/I71999127"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005383856","display_name":"Juan A. Rodr\u00edguez-Aguilar","orcid":"https://orcid.org/0000-0002-2940-6886"},"institutions":[{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Juan Antonio Rodriguez-Aguilar","raw_affiliation_strings":["Artificial intelligence research institute (IIIA-CSIC), Carrer de Can Planas, Campus de la UAB, 08193, Bellaterra, Spain"],"affiliations":[{"raw_affiliation_string":"Artificial intelligence research institute (IIIA-CSIC), Carrer de Can Planas, Campus de la UAB, 08193, Bellaterra, Spain","institution_ids":["https://openalex.org/I4210131846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080841253"],"corresponding_institution_ids":["https://openalex.org/I4210131846"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":3.0615,"has_fulltext":true,"cited_by_count":28,"citation_normalized_percentile":{"value":0.91885002,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"24","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12520","display_name":"Psychology of Moral and Emotional Judgment","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12520","display_name":"Psychology of Moral and Emotional Judgment","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8036297559738159},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7668240070343018},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7036705613136292},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.6267046332359314},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5325934290885925},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.45091044902801514},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.4248199760913849},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3694987893104553},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.21441903710365295},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08670586347579956}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8036297559738159},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7668240070343018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7036705613136292},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.6267046332359314},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5325934290885925},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.45091044902801514},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.4248199760913849},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3694987893104553},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21441903710365295},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08670586347579956}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1007/s10676-022-09635-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10676-022-09635-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10676-022-09635-0.pdf","source":{"id":"https://openalex.org/S13096939","display_name":"Ethics and Information Technology","issn_l":"1388-1957","issn":["1388-1957","1572-8439"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ethics and Information Technology","raw_type":"journal-article"},{"id":"pmh:oai:openaccess.city.ac.uk:31379","is_oa":false,"landing_page_url":"https://openaccess.city.ac.uk/view/creators_id/marc=2Eserramia-amoros.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4306401940","display_name":"City Research Online (City University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I180825142","host_organization_name":"City, University of London","host_organization_lineage":["https://openalex.org/I180825142"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:digital.csic.es:10261/282390","is_oa":true,"landing_page_url":"http://hdl.handle.net/10261/282390","pdf_url":null,"source":{"id":"https://openalex.org/S4306401639","display_name":"DIGITAL.CSIC (Spanish National Research Council (CSIC))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I134820265","host_organization_name":"Consejo Superior de Investigaciones Cient\u00edficas","host_organization_lineage":["https://openalex.org/I134820265"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"http://purl.org/coar/resource_type/c_6501"},{"id":"pmh:oai:diposit.ub.edu:2445/192920","is_oa":true,"landing_page_url":"https://hdl.handle.net/2445/192920","pdf_url":null,"source":{"id":"https://openalex.org/S4306401653","display_name":"Dip\u00f2sit Digital de la Universitat de Barcelona (Universitat de Barcelona)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71999127","host_organization_name":"Universitat de Barcelona","host_organization_lineage":["https://openalex.org/I71999127"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Articles publicats en revistes (Matem\u00e0tiques i Inform\u00e0tica)","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1007/s10676-022-09635-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10676-022-09635-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10676-022-09635-0.pdf","source":{"id":"https://openalex.org/S13096939","display_name":"Ethics and Information Technology","issn_l":"1388-1957","issn":["1388-1957","1572-8439"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ethics and Information Technology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2293209160","display_name":null,"funder_award_id":"2017 SGR 172","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G3843131709","display_name":null,"funder_award_id":"MISMIS PGC2018- 096212B-C33","funder_id":"https://openalex.org/F4320315062","funder_display_name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades"},{"id":"https://openalex.org/G4761200008","display_name":null,"funder_award_id":"CI-SUSTAIN (PID2019-104156GB-I00)","funder_id":"https://openalex.org/F4320315062","funder_display_name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades"},{"id":"https://openalex.org/G535402712","display_name":null,"funder_award_id":"LOGISTAR (H2020-769142)","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"},{"id":"https://openalex.org/G6885174309","display_name":null,"funder_award_id":"COREDEM (H2020-785907)","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"},{"id":"https://openalex.org/G7201911169","display_name":null,"funder_award_id":"2017 SGR 341","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G7640888206","display_name":null,"funder_award_id":"TAILOR (H2020-952215)","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"},{"id":"https://openalex.org/G7719371360","display_name":null,"funder_award_id":"FPU18/03387","funder_id":"https://openalex.org/F4320315062","funder_display_name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades"},{"id":"https://openalex.org/G7899581257","display_name":null,"funder_award_id":"AI4EU (H2020-825619)","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"},{"id":"https://openalex.org/G8240657013","display_name":null,"funder_award_id":"COMRIDI18-1-0010-02","funder_id":"https://openalex.org/F4320315062","funder_display_name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades"},{"id":"https://openalex.org/G8650240729","display_name":null,"funder_award_id":"Crowd4SDG (H2020-872944)","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"}],"funders":[{"id":"https://openalex.org/F4320315062","display_name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades","ror":null},{"id":"https://openalex.org/F4320321505","display_name":"Generalitat de Catalunya","ror":"https://ror.org/01bg62x04"},{"id":"https://openalex.org/F4320335254","display_name":"Horizon 2020","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4210389853.pdf","grobid_xml":"https://content.openalex.org/works/W4210389853.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W418559614","https://openalex.org/W653246875","https://openalex.org/W1589747210","https://openalex.org/W1595037444","https://openalex.org/W1845972764","https://openalex.org/W1999874108","https://openalex.org/W2012634103","https://openalex.org/W2025564061","https://openalex.org/W2107726111","https://openalex.org/W2118411866","https://openalex.org/W2130661992","https://openalex.org/W2141481921","https://openalex.org/W2240086230","https://openalex.org/W2321639160","https://openalex.org/W2410842990","https://openalex.org/W2508058976","https://openalex.org/W2508236932","https://openalex.org/W2565840370","https://openalex.org/W2566902129","https://openalex.org/W2571258182","https://openalex.org/W2606733399","https://openalex.org/W2794632992","https://openalex.org/W2803543472","https://openalex.org/W2823329128","https://openalex.org/W2911981581","https://openalex.org/W2950912723","https://openalex.org/W2960675728","https://openalex.org/W2963177864","https://openalex.org/W2964627913","https://openalex.org/W2973186106","https://openalex.org/W3002093512","https://openalex.org/W3002916673","https://openalex.org/W3036907156","https://openalex.org/W3037214912","https://openalex.org/W3037708944","https://openalex.org/W3105871743","https://openalex.org/W3111271797","https://openalex.org/W3115477807","https://openalex.org/W3169314969","https://openalex.org/W4210772669","https://openalex.org/W4238827160","https://openalex.org/W4244983688","https://openalex.org/W4251714114","https://openalex.org/W4403461050","https://openalex.org/W6621523278","https://openalex.org/W6629627883","https://openalex.org/W6677916085","https://openalex.org/W6718836005","https://openalex.org/W6749084171","https://openalex.org/W6774843429","https://openalex.org/W6857845870"],"related_works":["https://openalex.org/W3074294383","https://openalex.org/W4297949354","https://openalex.org/W2768698792","https://openalex.org/W4206669594","https://openalex.org/W2959276766","https://openalex.org/W2100369842","https://openalex.org/W4295941380","https://openalex.org/W260766989","https://openalex.org/W3139193008","https://openalex.org/W3111983280"],"abstract_inverted_index":{"Abstract":[0],"AI":[1],"research":[2],"is":[3,55,116],"being":[4],"challenged":[5],"with":[6,18,57],"ensuring":[7],"that":[8,105],"autonomous":[9],"agents":[10],"learn":[11,130],"to":[12,64,87,101],"behave":[13,88],"ethically,":[14],"namely":[15],"in":[16,79],"alignment":[17,31],"moral":[19,43],"values.":[20],"Here,":[21],"we":[22],"propose":[23],"a":[24,34,121,131],"novel":[25],"way":[26],"of":[27,60,68],"tackling":[28],"the":[29,58,66,111,125],"value":[30,46],"problem":[32],"as":[33],"two-step":[35,108],"process.":[36],"The":[37,75],"first":[38],"step":[39,77],"consists":[40,78],"on":[41,50,97],"formalising":[42],"values":[44],"and":[45,72],"aligned":[47],"behaviour":[48,115],"based":[49],"philosophical":[51],"foundations.":[52],"Our":[53],"formalisation":[54],"compatible":[56],"framework":[59],"(Multi-Objective)":[61],"Reinforcement":[62],"Learning,":[63],"ease":[65],"handling":[67],"an":[69,81,84,103],"agent\u2019s":[70],"individual":[71,93],"ethical":[73],"objectives.":[74],"second":[76],"designing":[80],"environment":[82,123],"wherein":[83,127],"agent":[85,126],"learns":[86],"ethically":[89],"while":[90],"pursuing":[91],"its":[92],"objective.":[94],"We":[95],"leverage":[96],"our":[98,107,118],"theoretical":[99],"results":[100],"introduce":[102],"algorithm":[104,119],"automates":[106],"approach.":[109],"In":[110],"cases":[112],"where":[113],"value-aligned":[114,132],"possible,":[117],"produces":[120],"learning":[122],"for":[124],"it":[128],"will":[129],"behaviour.":[133]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
