{"id":"https://openalex.org/W7128493063","doi":"https://doi.org/10.48550/arxiv.2602.08690","title":"SoK: The Pitfalls of Deep Reinforcement Learning for Cybersecurity","display_name":"SoK: The Pitfalls of Deep Reinforcement Learning for Cybersecurity","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128493063","doi":"https://doi.org/10.48550/arxiv.2602.08690"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.08690","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125576200","display_name":"Shae McFadden","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"McFadden, Shae","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025289470","display_name":"Myles Foley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Foley, Myles","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125477001","display_name":"Elizabeth Bates","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bates, Elizabeth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003959400","display_name":"Ilias Tsingenopoulos","orcid":"https://orcid.org/0000-0002-7714-5238"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsingenopoulos, Ilias","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125504918","display_name":"Sanyam Vyas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vyas, Sanyam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125563641","display_name":"Vasilios Mavroudis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mavroudis, Vasilios","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080048241","display_name":"Chris Hicks","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hicks, Chris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5072597369","display_name":"Fabio Pierazzi","orcid":"https://orcid.org/0000-0002-1254-1758"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pierazzi, Fabio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5125576200"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.5322999954223633,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.5322999954223633,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.12309999763965607,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.11309999972581863,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bespoke","display_name":"Bespoke","score":0.8723000288009644},{"id":"https://openalex.org/keywords/malware","display_name":"Malware","score":0.5641000270843506},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5491999983787537},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.531499981880188},{"id":"https://openalex.org/keywords/sandbox","display_name":"Sandbox (software development)","score":0.42399999499320984},{"id":"https://openalex.org/keywords/cyber-threats","display_name":"Cyber threats","score":0.32989999651908875}],"concepts":[{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.8723000288009644},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6585000157356262},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.6248999834060669},{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.5641000270843506},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5491999983787537},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.531499981880188},{"id":"https://openalex.org/C167981075","wikidata":"https://www.wikidata.org/wiki/Q2667186","display_name":"Sandbox (software development)","level":2,"score":0.42399999499320984},{"id":"https://openalex.org/C3018725008","wikidata":"https://www.wikidata.org/wiki/Q4071928","display_name":"Cyber threats","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C182590292","wikidata":"https://www.wikidata.org/wiki/Q989632","display_name":"Network security","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C191267431","wikidata":"https://www.wikidata.org/wiki/Q911932","display_name":"Honeypot","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2741999924182892},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2669000029563904},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2669000029563904},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2587999999523163},{"id":"https://openalex.org/C2779395397","wikidata":"https://www.wikidata.org/wiki/Q15731404","display_name":"Malware analysis","level":3,"score":0.2513999938964844}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.08690","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.08690","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08690","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.08690","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.8033332228660583}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1],"Learning":[2],"(DRL)":[3],"has":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,62,116],"domains":[9],"requiring":[10],"sequential":[11],"decision-making,":[12],"motivating":[13],"its":[14],"application":[15],"to":[16,25,139],"cybersecurity":[17,47,65],"problems.":[18],"However,":[19],"transitioning":[20],"DRL":[21,63],"from":[22],"laboratory":[23],"simulations":[24],"bespoke":[26],"cyber":[27,119],"environments":[28],"can":[29],"introduce":[30],"numerous":[31],"issues.":[32],"This":[33],"is":[34],"further":[35],"exacerbated":[36],"by":[37],"the":[38,69,90,107,141],"often":[39],"adversarial,":[40],"non-stationary,":[41],"and":[42,54,78,95,125,146],"partially-observable":[43],"nature":[44],"of":[45,71,92,99,110,143],"most":[46],"tasks.":[48],"In":[49],"this":[50],"paper,":[51],"we":[52,88,132],"identify":[53],"systematize":[55],"11":[56],"methodological":[57],"pitfalls":[58,102,112],"that":[59],"frequently":[60],"occur":[61],"for":[64,136],"(DRL4Sec)":[66],"literature":[67],"across":[68],"stages":[70],"environment":[72],"modeling,":[73],"agent":[74],"training,":[75],"performance":[76],"evaluation,":[77],"system":[79],"deployment.":[80],"By":[81],"analyzing":[82],"66":[83],"significant":[84],"DRL4Sec":[85],"papers":[86],"(2018-2025),":[87],"quantify":[89],"prevalence":[91],"each":[93,137],"pitfall":[94,138],"find":[96],"an":[97],"average":[98],"over":[100],"five":[101],"per":[103],"paper.":[104],"We":[105],"demonstrate":[106],"practical":[108],"impact":[109],"these":[111],"using":[113],"controlled":[114],"experiments":[115],"(i)":[117],"autonomous":[118],"defense,":[120],"(ii)":[121],"adversarial":[122],"malware":[123],"creation,":[124],"(iii)":[126],"web":[127],"security":[128,149],"testing":[129],"environments.":[130],"Finally,":[131],"provide":[133],"actionable":[134],"recommendations":[135],"support":[140],"development":[142],"more":[144],"rigorous":[145],"deployable":[147],"DRL-based":[148],"systems.":[150]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-02-11T00:00:00"}
