{"id":"https://openalex.org/W4415428141","doi":"https://doi.org/10.3233/faia251200","title":"OFTEN-DEEPRL: On-the-Fly Teaching of Ethical Norms to Deep Reinforcement Learning Agents","display_name":"OFTEN-DEEPRL: On-the-Fly Teaching of Ethical Norms to Deep Reinforcement Learning Agents","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428141","doi":"https://doi.org/10.3233/faia251200"},"language":null,"primary_location":{"id":"doi:10.3233/faia251200","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251200","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251200","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065480828","display_name":"Ignacio D. Lopez-Miguel","orcid":"https://orcid.org/0000-0002-8044-0385"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Ignacio D. Lopez-Miguel","raw_affiliation_strings":["TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at"],"affiliations":[{"raw_affiliation_string":"TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042662851","display_name":"Stefan Adam","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Sebastian Adam","raw_affiliation_strings":["TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at"],"affiliations":[{"raw_affiliation_string":"TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050836932","display_name":"Ezio Bartocci","orcid":"https://orcid.org/0000-0002-8004-6601"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Ezio Bartocci","raw_affiliation_strings":["TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at"],"affiliations":[{"raw_affiliation_string":"TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082178480","display_name":"Thomas Eiter","orcid":"https://orcid.org/0000-0001-6003-6345"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Thomas Eiter","raw_affiliation_strings":["TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at"],"affiliations":[{"raw_affiliation_string":"TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078635542","display_name":"Martin Tappler","orcid":"https://orcid.org/0000-0002-4193-5609"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Martin Tappler","raw_affiliation_strings":["TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at"],"affiliations":[{"raw_affiliation_string":"TU Wien, Vienna, Austria, ignacio.lopez@tuwien.ac.at, sebastian.adam@tuwien.ac.at, ezio.bartocci@tuwien.ac.at, thomas.eiter@tuwien.ac.at, martin.tappler@tuwien.ac.at","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5065480828"],"corresponding_institution_ids":["https://openalex.org/I145847075"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.54395501,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8119999766349792},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6520000100135803},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.555899977684021},{"id":"https://openalex.org/keywords/norm","display_name":"Norm (philosophy)","score":0.48510000109672546},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.45159998536109924},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4260999858379364},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.40310001373291016}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8119999766349792},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6520000100135803},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.555899977684021},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5157999992370605},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.48510000109672546},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45320001244544983},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.45159998536109924},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3637999892234802},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3222000002861023},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.2994000017642975},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.28299999237060547},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2709999978542328},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2685000002384186},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.26820001006126404},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2612000107765198}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251200","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251200","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251200","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251200","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"AI":[0],"agents":[1,27,62,172],"trained":[2,63],"with":[3,37,45,64,154,174],"reinforcement":[4,66],"learning":[5],"(RL)":[6],"usually":[7],"focus":[8],"on":[9,77],"completing":[10],"their":[11,21,32,46],"intended":[12],"tasks":[13],"without":[14],"detours,":[15],"as":[16,126],"doing":[17],"so":[18],"typically":[19],"maximizes":[20],"reward.":[22],"However,":[23],"real-world":[24],"deployment":[25],"requires":[26],"that":[28,42,106,142],"not":[29],"only":[30],"achieve":[31,176],"goals":[33],"but":[34],"also":[35],"comply":[36],"ethical":[38,59],"and":[39,98,163],"societal":[40],"norms":[41,60],"may":[43],"conflict":[44],"learned":[47],"behavior.":[48],"In":[49,169],"this":[50],"work,":[51],"we":[52,101],"present":[53],"OFTEN-DEEPRL,":[54],"an":[55,73],"approach":[56,69,153],"to":[57],"integrate":[58],"into":[61],"deep":[65],"learning.":[67],"The":[68],"starts":[70],"by":[71],"training":[72,136],"RL":[74],"policy":[75,89,132],"focused":[76],"task":[78,178],"performance.":[79],"Building":[80],"upon":[81],"such":[82],"a":[83,94,103,119,160,164],"pre-trained":[84],"policy,":[85],"OFTEN-DEEPRL":[86,175],"adapts":[87],"the":[88,111,130,134,147],"through":[90],"norm-guided":[91,135],"training.":[92],"For":[93],"combination":[95],"of":[96,157],"observations":[97],"domain":[99],"knowledge,":[100],"employ":[102],"logic":[104],"program":[105],"generates":[107],"norm-compliant":[108],"plans":[109,124],"for":[110,128],"agent":[112],"using":[113],"answer":[114],"set":[115],"programming":[116],"(ASP)":[117],"within":[118],"given":[120],"planning":[121],"horizon.":[122],"These":[123],"serve":[125],"demonstrations":[127],"fine-tuning":[129],"agent\u2019s":[131],"in":[133],"phase,":[137],"guiding":[138],"it":[139],"toward":[140],"behavior":[141],"remains":[143],"effective":[144],"while":[145,180],"respecting":[146],"specified":[148],"norms.":[149],"We":[150],"validate":[151],"our":[152],"three":[155],"types":[156],"scenarios:":[158],"Pac-Man,":[159],"gardener":[161],"simulation,":[162],"SUMO-RL":[165],"traffic":[166],"control":[167],"scenario.":[168],"all":[170],"settings,":[171],"fine-tuned":[173],"comparable":[177],"performance":[179],"significantly":[181],"reducing":[182],"norm":[183],"violations.":[184]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
