{"id":"https://openalex.org/W4402811680","doi":"https://doi.org/10.1109/csr61664.2024.10679438","title":"When Rewards Deceive: Counteracting Reward Poisoning on Online Deep Reinforcement Learning","display_name":"When Rewards Deceive: Counteracting Reward Poisoning on Online Deep Reinforcement Learning","publication_year":2024,"publication_date":"2024-09-02","ids":{"openalex":"https://openalex.org/W4402811680","doi":"https://doi.org/10.1109/csr61664.2024.10679438"},"language":"en","primary_location":{"id":"doi:10.1109/csr61664.2024.10679438","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csr61664.2024.10679438","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Cyber Security and Resilience (CSR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079172175","display_name":"Myria Bouhaddi","orcid":"https://orcid.org/0000-0002-9941-550X"},"institutions":[{"id":"https://openalex.org/I33217400","display_name":"Universit\u00e9 du Qu\u00e9bec en Outaouais","ror":"https://ror.org/011pqxa69","country_code":"CA","type":"education","lineage":["https://openalex.org/I33217400","https://openalex.org/I49663120"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Myria Bouhaddi","raw_affiliation_strings":["Computer Security Research Laboratory Universit&#x00E9; du Qu&#x00E9;bec en Outaouais,Quebec,Canada"],"affiliations":[{"raw_affiliation_string":"Computer Security Research Laboratory Universit&#x00E9; du Qu&#x00E9;bec en Outaouais,Quebec,Canada","institution_ids":["https://openalex.org/I33217400"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071452551","display_name":"Kamel Adi","orcid":"https://orcid.org/0000-0003-2869-0333"},"institutions":[{"id":"https://openalex.org/I33217400","display_name":"Universit\u00e9 du Qu\u00e9bec en Outaouais","ror":"https://ror.org/011pqxa69","country_code":"CA","type":"education","lineage":["https://openalex.org/I33217400","https://openalex.org/I49663120"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Kamel Adi","raw_affiliation_strings":["Computer Security Research Laboratory Universit&#x00E9; du Qu&#x00E9;bec en Outaouais,Quebec,Canada"],"affiliations":[{"raw_affiliation_string":"Computer Security Research Laboratory Universit&#x00E9; du Qu&#x00E9;bec en Outaouais,Quebec,Canada","institution_ids":["https://openalex.org/I33217400"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5079172175"],"corresponding_institution_ids":["https://openalex.org/I33217400"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16516983,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"38","last_page":"44"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12520","display_name":"Psychology of Moral and Emotional Judgment","score":0.6473000049591064,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12520","display_name":"Psychology of Moral and Emotional Judgment","score":0.6473000049591064,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7675968408584595},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.616206705570221},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5433450937271118},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.40479809045791626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37630903720855713},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3552928566932678},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3488461375236511},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.1779150366783142}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7675968408584595},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.616206705570221},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5433450937271118},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.40479809045791626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37630903720855713},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3552928566932678},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3488461375236511},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1779150366783142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/csr61664.2024.10679438","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csr61664.2024.10679438","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Cyber Security and Resilience (CSR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2902907165","https://openalex.org/W2962763344","https://openalex.org/W2963454359","https://openalex.org/W2998116579","https://openalex.org/W3087391814","https://openalex.org/W3123348991","https://openalex.org/W4309353452","https://openalex.org/W4382318590","https://openalex.org/W4385163974","https://openalex.org/W4386214387","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6683195989","https://openalex.org/W6744991969","https://openalex.org/W6746973458","https://openalex.org/W6767528206","https://openalex.org/W6773640337","https://openalex.org/W6775422323","https://openalex.org/W6775850705","https://openalex.org/W6789484704","https://openalex.org/W6790267528","https://openalex.org/W6790439231","https://openalex.org/W6790547748","https://openalex.org/W6796689536","https://openalex.org/W6810144607"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1],"Learning":[2],"(DRL)":[3],"agents":[4,129],"are":[5],"particularly":[6],"vulnerable":[7],"to":[8,17],"poisoning":[9],"attacks,":[10],"where":[11],"adversaries":[12],"subtly":[13],"manipulate":[14],"reward":[15,83],"signals":[16],"alter":[18],"agent":[19,66,73],"behavior":[20],"toward":[21],"a":[22,31,37,53,77,87,122],"specific":[23],"undesirable":[24],"policy.":[25],"In":[26],"response,":[27],"this":[28],"paper":[29],"introduces":[30],"novel":[32],"defense":[33],"mechanism":[34],"that":[35,81],"leverages":[36],"multi-environment":[38],"training":[39],"strategy,":[40],"significantly":[41],"enhancing":[42],"the":[43,60,64,104],"resilience":[44],"of":[45,107,131],"DRL":[46,65,117,128],"agents.":[47],"This":[48,119],"strategy":[49],"is":[50],"underpinned":[51],"by":[52,85],"non-cooperative":[54],"Bayesian":[55],"game":[56],"model,":[57],"which":[58,100],"captures":[59],"dynamic":[61],"interplay":[62],"between":[63],"and":[67],"its":[68,112],"attacker.":[69],"To":[70],"further":[71],"strengthen":[72],"defenses,":[74],"we":[75],"incorporate":[76],"variance-based":[78],"detection":[79],"method":[80],"identifies":[82],"manipulations":[84],"establishing":[86],"critical":[88],"decision":[89],"threshold.":[90],"Our":[91],"experimental":[92],"evaluation":[93],"involves":[94],"rigorous":[95],"testing":[96],"through":[97],"simulation-based":[98],"experiments,":[99],"validate":[101],"not":[102],"only":[103],"theoretical":[105],"robustness":[106],"our":[108],"approach,":[109],"but":[110],"also":[111],"practical":[113],"effectiveness":[114],"across":[115],"diverse":[116],"scenarios.":[118],"research":[120],"provides":[121],"comprehensive":[123],"blueprint":[124],"for":[125],"building":[126],"resilient":[127],"capable":[130],"maintaining":[132],"optimal":[133],"performance,":[134],"even":[135],"when":[136],"faced":[137],"with":[138],"sophisticated":[139],"adversarial":[140],"challenges.":[141]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
