{"id":"https://openalex.org/W7138413809","doi":"https://doi.org/10.1609/aaai.v40i2.37053","title":"DRMD: Deep Reinforcement Learning for Malware Detection Under Concept Drift","display_name":"DRMD: Deep Reinforcement Learning for Malware Detection Under Concept Drift","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138413809","doi":"https://doi.org/10.1609/aaai.v40i2.37053"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v40i2.37053","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i2.37053","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37053/41015","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37053/41015","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shae McFadden","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Shae McFadden","raw_affiliation_strings":["King's College London\nThe Alan Turing Institute\nUniversity College London"],"affiliations":[{"raw_affiliation_string":"King's College London\nThe Alan Turing Institute\nUniversity College London","institution_ids":["https://openalex.org/I4210128584","https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Myles Foley","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Myles Foley","raw_affiliation_strings":["The Alan Turing Institute"],"affiliations":[{"raw_affiliation_string":"The Alan Turing Institute","institution_ids":["https://openalex.org/I4210128584"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mario D'Onghia","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mario D'Onghia","raw_affiliation_strings":["University College London"],"affiliations":[{"raw_affiliation_string":"University College London","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chris Hicks","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chris Hicks","raw_affiliation_strings":["The Alan Turing Institute"],"affiliations":[{"raw_affiliation_string":"The Alan Turing Institute","institution_ids":["https://openalex.org/I4210128584"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Vasilios Mavroudis","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vasilios Mavroudis","raw_affiliation_strings":["The Alan Turing Institute"],"affiliations":[{"raw_affiliation_string":"The Alan Turing Institute","institution_ids":["https://openalex.org/I4210128584"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nicola Paoletti","orcid":null},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nicola Paoletti","raw_affiliation_strings":["King's College London"],"affiliations":[{"raw_affiliation_string":"King's College London","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"last","author":{"id":null,"display_name":"Fabio Pierazzi","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fabio Pierazzi","raw_affiliation_strings":["University College London"],"affiliations":[{"raw_affiliation_string":"University College London","institution_ids":["https://openalex.org/I45129253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210128584","https://openalex.org/I45129253"],"apc_list":null,"apc_paid":null,"fwci":93.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":1.0,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"40","issue":"2","first_page":"854","last_page":"862"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.8791000247001648,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.8791000247001648,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.0803999975323677,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.0044999998062849045,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/concept-drift","display_name":"Concept drift","score":0.7876999974250793},{"id":"https://openalex.org/keywords/malware","display_name":"Malware","score":0.7583000063896179},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6577000021934509},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.47110000252723694},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.39259999990463257},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.35670000314712524},{"id":"https://openalex.org/keywords/android-malware","display_name":"Android malware","score":0.35339999198913574}],"concepts":[{"id":"https://openalex.org/C60777511","wikidata":"https://www.wikidata.org/wiki/Q3045002","display_name":"Concept drift","level":3,"score":0.7876999974250793},{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.7583000063896179},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6819000244140625},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6577000021934509},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6189000010490417},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5536999702453613},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.47110000252723694},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.39259999990463257},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.35670000314712524},{"id":"https://openalex.org/C2989133298","wikidata":"https://www.wikidata.org/wiki/Q94","display_name":"Android malware","level":3,"score":0.35339999198913574},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C2779395397","wikidata":"https://www.wikidata.org/wiki/Q15731404","display_name":"Malware analysis","level":3,"score":0.3431999981403351},{"id":"https://openalex.org/C557433098","wikidata":"https://www.wikidata.org/wiki/Q94","display_name":"Android (operating system)","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2921999990940094},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.2851000130176544},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C2984634286","wikidata":"https://www.wikidata.org/wiki/Q1331926","display_name":"Decision process","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2632000148296356}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1609/aaai.v40i2.37053","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i2.37053","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37053/41015","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:kclpure.kcl.ac.uk:publications/bb00b610-a4c3-40e3-8237-355025ab5a40","is_oa":true,"landing_page_url":"https://kclpure.kcl.ac.uk/portal/en/publications/bb00b610-a4c3-40e3-8237-355025ab5a40","pdf_url":null,"source":{"id":"https://openalex.org/S4306400216","display_name":"Research Portal (King's College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I183935753","host_organization_name":"King's College London","host_organization_lineage":["https://openalex.org/I183935753"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"McFadden, S, Foley, M, D'Onghia, M, Hicks, C, Mavroudis, V, Paoletti, N & Pierazzi, F 2026, DRMD : Deep Reinforcement Learning for Malware Detection under Concept Drift. in Proceedings of the AAAI Conference on Artificial Intelligence. 2 edn, vol. 40, Proceedings of the AAAI Conference on Artificial Intelligence, pp. 854-862. https://doi.org/10.1609/aaai.v40i2.37053","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2508.18839","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.18839","pdf_url":"https://arxiv.org/pdf/2508.18839","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i2.37053","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i2.37053","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37053/41015","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3681684530","display_name":"XAdv: Robust Explanations for Malware Detection","funder_award_id":"EP/X015971/2","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320332972","display_name":"Defence Science and Technology Laboratory","ror":"https://ror.org/04jswqb94"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320335334","display_name":"Defence Science and Technology Group","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138413809.pdf","grobid_xml":"https://content.openalex.org/works/W7138413809.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Malware":[0,118],"detection":[1,49,78,109,199],"in":[2,28,157,206],"real-world":[3],"settings":[4],"must":[5],"deal":[6],"with":[7,53],"evolving":[8],"threats,":[9],"limited":[10],"labeling":[11,44],"budgets,":[12],"and":[13,45,58,85,98,110,177,182,200],"uncertain":[14],"predictions.":[15],"Traditional":[16],"classifiers,":[17],"without":[18],"additional":[19],"mechanisms,":[20],"struggle":[21],"to":[22,39,42,61,130,152,163,203],"maintain":[23],"performance":[24,97,135,150],"under":[25,140],"concept":[26,66,164,204],"drift":[27,111,132,205],"malware":[29,48,77,127,198,212],"domains,":[30],"as":[31,79],"their":[32],"supervised":[33],"learning":[34,56,90],"formulation":[35,75],"cannot":[36],"optimize":[37],"when":[38],"defer":[40],"decisions":[41],"manual":[43,103],"adaptation.":[46],"Modern":[47],"pipelines":[50],"combine":[51],"classifiers":[52],"monthly":[54],"active":[55],"(AL)":[57],"rejection":[59],"mechanisms":[60],"mitigate":[62],"the":[63,107,116,158,167,180,190,207],"impact":[64],"of":[65,76,175,210],"drift.":[67,165],"In":[68],"this":[69],"work,":[70],"we":[71],"develop":[72],"a":[73,80,87,144],"novel":[74],"one-step":[81],"Markov":[82],"Decision":[83],"Process":[84],"train":[86],"deep":[88],"reinforcement":[89],"(DRL)":[91],"agent,":[92],"simultaneously":[93],"optimizing":[94],"sample":[95],"classification":[96,154],"rejecting":[99],"high-risk":[100],"samples":[101],"for":[102,179,189],"labeling.":[104],"We":[105],"evaluated":[106],"joint":[108],"mitigation":[112],"policy":[113],"learned":[114,139],"by":[115],"DRL-based":[117],"Detection":[119],"(DRMD)":[120],"agent":[121,169],"through":[122],"time-aware":[123],"evaluations":[124],"on":[125],"Android":[126,211],"datasets":[128],"subject":[129],"realistic":[131],"requiring":[133],"multi-year":[134],"stability.":[136],"The":[137],"policies":[138],"these":[141],"conditions":[142],"achieve":[143],"higher":[145],"Area":[146],"Under":[147],"Time":[148],"(AUT)":[149],"compared":[151],"standard":[153],"approaches":[155],"used":[156],"domain,":[159],"showing":[160],"improved":[161,201],"resilience":[162],"Specifically,":[166],"DRMD":[168],"achieved":[170],"an":[171],"average":[172],"AUT":[173],"improvement":[174],"8.66":[176],"10.90":[178],"classification-only":[181],"classification-rejection":[183],"policies,":[184],"respectively.":[185],"Our":[186],"results":[187],"demonstrate":[188],"first":[191],"time":[192],"that":[193],"DRL":[194],"can":[195],"facilitate":[196],"effective":[197],"resiliency":[202],"dynamic":[208],"setting":[209],"detection.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-12-10T00:00:00"}
