{"id":"https://openalex.org/W7127175413","doi":"https://doi.org/10.1007/s10458-026-09732-0","title":"Learning the value systems of agents with preference-based and inverse reinforcement learning","display_name":"Learning the value systems of agents with preference-based and inverse reinforcement learning","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127175413","doi":"https://doi.org/10.1007/s10458-026-09732-0"},"language":"en","primary_location":{"id":"doi:10.1007/s10458-026-09732-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-026-09732-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09732-0.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09732-0.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039767300","display_name":"Andr\u00e9s Holgado-S\u00e1nchez","orcid":"https://orcid.org/0000-0001-8853-1022"},"institutions":[{"id":"https://openalex.org/I182083151","display_name":"Universidad Rey Juan Carlos","ror":"https://ror.org/01v5cv687","country_code":"ES","type":"education","lineage":["https://openalex.org/I182083151"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Andr\u00e9s Holgado-S\u00e1nchez","raw_affiliation_strings":["CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0001-8853-1022","affiliations":[{"raw_affiliation_string":"CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain","institution_ids":["https://openalex.org/I182083151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052621336","display_name":"Holger Billhardt","orcid":"https://orcid.org/0000-0001-8298-4178"},"institutions":[{"id":"https://openalex.org/I182083151","display_name":"Universidad Rey Juan Carlos","ror":"https://ror.org/01v5cv687","country_code":"ES","type":"education","lineage":["https://openalex.org/I182083151"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Holger Billhardt","raw_affiliation_strings":["CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0001-8298-4178","affiliations":[{"raw_affiliation_string":"CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain","institution_ids":["https://openalex.org/I182083151"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124810329","display_name":"Alberto Fern\u00e1ndez","orcid":null},"institutions":[{"id":"https://openalex.org/I182083151","display_name":"Universidad Rey Juan Carlos","ror":"https://ror.org/01v5cv687","country_code":"ES","type":"education","lineage":["https://openalex.org/I182083151"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Alberto Fern\u00e1ndez","raw_affiliation_strings":["CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-8962-6856","affiliations":[{"raw_affiliation_string":"CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain","institution_ids":["https://openalex.org/I182083151"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052274381","display_name":"Sascha Ossowsk\u00ed","orcid":"https://orcid.org/0000-0003-2483-9508"},"institutions":[{"id":"https://openalex.org/I182083151","display_name":"Universidad Rey Juan Carlos","ror":"https://ror.org/01v5cv687","country_code":"ES","type":"education","lineage":["https://openalex.org/I182083151"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Sascha Ossowski","raw_affiliation_strings":["CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0003-2483-9508","affiliations":[{"raw_affiliation_string":"CETINIA, Universidad Rey Juan Carlos, Tulip\u00e1n (Unnumbered), M\u00f3stoles, 28933, Madrid, Spain","institution_ids":["https://openalex.org/I182083151"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5039767300"],"corresponding_institution_ids":["https://openalex.org/I182083151"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19510761,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.20669999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.20669999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.13459999859333038,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.10790000110864639,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6373000144958496},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6302000284194946},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5727999806404114},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5134000182151794},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3847000002861023},{"id":"https://openalex.org/keywords/meaning","display_name":"Meaning (existential)","score":0.375},{"id":"https://openalex.org/keywords/software-agent","display_name":"Software agent","score":0.32199999690055847}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7422000169754028},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6373000144958496},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6302000284194946},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6208000183105469},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5727999806404114},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5134000182151794},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44839999079704285},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3847000002861023},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.375},{"id":"https://openalex.org/C5894958","wikidata":"https://www.wikidata.org/wiki/Q2297769","display_name":"Software agent","level":2,"score":0.32199999690055847},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.3009999990463257},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.25859999656677246},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s10458-026-09732-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-026-09732-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09732-0.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2602.04518","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2602.04518","pdf_url":"https://arxiv.org/pdf/2602.04518","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:doi:10.48550/arxiv.2602.04518","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1007/s10458-026-09732-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10458-026-09732-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10458-026-09732-0.pdf","source":{"id":"https://openalex.org/S5405189","display_name":"Autonomous Agents and Multi-Agent Systems","issn_l":"1387-2532","issn":["1387-2532","1573-7454"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Autonomous Agents and Multi-Agent Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8112054467201233}],"awards":[{"id":"https://openalex.org/G1622782223","display_name":null,"funder_award_id":"MCIN/AEI/10","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G2262748287","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3681454997","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4042783231","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G451917667","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5003894964","display_name":null,"funder_award_id":"TED2021-131295B-C33","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G586857820","display_name":null,"funder_award_id":"501100011033/FEDER","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G5967599077","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6071709581","display_name":null,"funder_award_id":"13039/501100011033/","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6677760267","display_name":null,"funder_award_id":"PID2021-123673OB-C32","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7084143925","display_name":null,"funder_award_id":"AEI/10","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7266728691","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320324496","display_name":"Universidad Rey Juan Carlos","ror":"https://ror.org/01v5cv687"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W7127175413.pdf"},"referenced_works_count":63,"referenced_works":["https://openalex.org/W64088143","https://openalex.org/W1425161535","https://openalex.org/W1846353404","https://openalex.org/W1896027656","https://openalex.org/W1973063495","https://openalex.org/W1999874108","https://openalex.org/W2053170019","https://openalex.org/W2059946539","https://openalex.org/W2119659537","https://openalex.org/W2123791568","https://openalex.org/W2154023516","https://openalex.org/W2165643161","https://openalex.org/W2168055557","https://openalex.org/W2473622958","https://openalex.org/W2763110165","https://openalex.org/W2791436589","https://openalex.org/W2811434247","https://openalex.org/W2890441563","https://openalex.org/W2901707424","https://openalex.org/W2901792336","https://openalex.org/W2911981581","https://openalex.org/W2963177864","https://openalex.org/W2988680279","https://openalex.org/W3002093512","https://openalex.org/W3138984732","https://openalex.org/W3175257734","https://openalex.org/W3176471679","https://openalex.org/W3190455919","https://openalex.org/W3215312454","https://openalex.org/W4200635688","https://openalex.org/W4210389853","https://openalex.org/W4210585635","https://openalex.org/W4211192987","https://openalex.org/W4229632424","https://openalex.org/W4247462173","https://openalex.org/W4285419493","https://openalex.org/W4287266177","https://openalex.org/W4287692910","https://openalex.org/W4287855052","https://openalex.org/W4292545669","https://openalex.org/W4293083713","https://openalex.org/W4321392588","https://openalex.org/W4322485644","https://openalex.org/W4366817401","https://openalex.org/W4381733211","https://openalex.org/W4386473830","https://openalex.org/W4388912338","https://openalex.org/W4390215524","https://openalex.org/W4391451779","https://openalex.org/W4392305607","https://openalex.org/W4393147562","https://openalex.org/W4393147692","https://openalex.org/W4396685016","https://openalex.org/W4398161150","https://openalex.org/W4399380743","https://openalex.org/W4400105162","https://openalex.org/W4403578421","https://openalex.org/W4404792842","https://openalex.org/W4408665360","https://openalex.org/W4415428033","https://openalex.org/W4415428677","https://openalex.org/W4417093270","https://openalex.org/W7124244647"],"related_works":[],"abstract_inverted_index":{"Agreement":[0],"Technologies":[1],"refer":[2],"to":[3,23,25,47,50,68,100,117,135,150,175,195],"open":[4],"computer":[5],"systems":[6,34,120,154],"in":[7,21,35,45,108,112,132],"which":[8],"autonomous":[9],"software":[10,77],"agents":[11],"interact":[12],"with":[13,57],"one":[14],"another,":[15],"typically":[16],"on":[17,122,127,180],"behalf":[18],"of":[19,32,91,105,167],"humans,":[20],"order":[22,46],"come":[24],"mutually":[26],"acceptable":[27,49],"agreements.":[28],"With":[29],"the":[30,51,89,102,136,168],"advance":[31],"AI":[33],"recent":[36],"years,":[37],"it":[38,96],"has":[39],"become":[40],"apparent":[41],"that":[42],"such":[43],"agreements,":[44],"be":[48],"involved":[52],"parties,":[53],"must":[54],"remain":[55],"aligned":[56],"ethical":[58],"principles":[59],"and":[60,157,190,200,207],"moral":[61,93],"values.":[62,94],"However,":[63],"this":[64,143],"is":[65,97,205],"notoriously":[66],"difficult":[67],"ensure,":[69],"especially":[70],"as":[71,185,187],"different":[72,81],"human":[73,140,158],"users":[74],"(and":[75],"their":[76],"agents)":[78],"may":[79,86],"hold":[80],"value":[82,107,119,128,153,169,197,201],"systems,":[83],"i.e.":[84],"they":[85],"differently":[87],"weigh":[88],"importance":[90],"individual":[92],"Furthermore,":[95],"often":[98],"hard":[99],"specify":[101],"precise":[103],"meaning":[104],"a":[106,109,113,147,164],"particular":[110],"context":[111],"computational":[114],"manner.":[115],"Methods":[116],"estimate":[118],"based":[121,126,179],"human-engineered":[123],"specifications,":[124],"e.g.":[125],"surveys,":[129],"are":[130],"limited":[131],"scale":[133],"due":[134],"need":[137],"for":[138],"intense":[139],"moderation.":[141],"In":[142,160],"article,":[144],"we":[145,162],"propose":[146,163],"novel":[148],"method":[149],"automatically":[151],"learn":[152],"from":[155],"observations":[156],"demonstrations.":[159],"particular,":[161],"formal":[165],"model":[166],"system":[170],"learning":[171,193],"problem,":[172],"its":[173],"instantiation":[174],"sequential":[176],"decision-making":[177],"domains":[178],"multi-objective":[181],"Markov":[182],"decision":[183],"processes,":[184],"well":[186],"tailored":[188],"preference-based":[189],"inverse":[191],"reinforcement":[192],"algorithms":[194],"infer":[196],"grounding":[198],"functions":[199],"systems.":[202],"The":[203],"approach":[204],"illustrated":[206],"evaluated":[208],"by":[209],"two":[210],"simulated":[211],"use":[212],"cases.":[213]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-02-03T00:00:00"}
