{"id":"https://openalex.org/W4416962789","doi":"https://doi.org/10.1109/pst65910.2025.11268866","title":"Cyber Threat Mitigation with Knowledge-Infused Reinforcement Learning and LLM-Guided Policies","display_name":"Cyber Threat Mitigation with Knowledge-Infused Reinforcement Learning and LLM-Guided Policies","publication_year":2025,"publication_date":"2025-08-26","ids":{"openalex":"https://openalex.org/W4416962789","doi":"https://doi.org/10.1109/pst65910.2025.11268866"},"language":null,"primary_location":{"id":"doi:10.1109/pst65910.2025.11268866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pst65910.2025.11268866","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 22nd Annual International Conference on Privacy, Security, and Trust (PST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014508656","display_name":"Md. Shamim Towhid","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Md. Shamim Towhid","raw_affiliation_strings":["National Research Council,Canada"],"affiliations":[{"raw_affiliation_string":"National Research Council,Canada","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075654187","display_name":"Shahrear Iqbal","orcid":"https://orcid.org/0000-0001-7819-5715"},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Shahrear Iqbal","raw_affiliation_strings":["National Research Council,Canada"],"affiliations":[{"raw_affiliation_string":"National Research Council,Canada","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049169545","display_name":"Euclides Carlos Pinto Neto","orcid":"https://orcid.org/0000-0002-1241-6391"},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Euclides Carlos Pinto Neto","raw_affiliation_strings":["National Research Council,Canada"],"affiliations":[{"raw_affiliation_string":"National Research Council,Canada","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017149323","display_name":"Nashid Shahriar","orcid":"https://orcid.org/0000-0002-1101-6716"},"institutions":[{"id":"https://openalex.org/I194028371","display_name":"University of Regina","ror":"https://ror.org/03dzc0485","country_code":"CA","type":"education","lineage":["https://openalex.org/I194028371"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Nashid Shahriar","raw_affiliation_strings":["University of Regina,Department of Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Regina,Department of Computer Science","institution_ids":["https://openalex.org/I194028371"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020247375","display_name":"Scott Buffett","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Scott Buffett","raw_affiliation_strings":["National Research Council,Canada"],"affiliations":[{"raw_affiliation_string":"National Research Council,Canada","institution_ids":["https://openalex.org/I4210159778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037744076","display_name":"Madeena Sultana","orcid":"https://orcid.org/0000-0002-9272-4326"},"institutions":[{"id":"https://openalex.org/I1297460800","display_name":"Defence Research and Development Canada","ror":"https://ror.org/00hgy8d33","country_code":"CA","type":"funder","lineage":["https://openalex.org/I1297460800","https://openalex.org/I1336338359","https://openalex.org/I2802286613"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Madeena Sultana","raw_affiliation_strings":["Defence Research and Development,Canada"],"affiliations":[{"raw_affiliation_string":"Defence Research and Development,Canada","institution_ids":["https://openalex.org/I1297460800"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033634735","display_name":"Adrian Taylor","orcid":"https://orcid.org/0000-0003-2701-9468"},"institutions":[{"id":"https://openalex.org/I1297460800","display_name":"Defence Research and Development Canada","ror":"https://ror.org/00hgy8d33","country_code":"CA","type":"funder","lineage":["https://openalex.org/I1297460800","https://openalex.org/I1336338359","https://openalex.org/I2802286613"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Adrian Taylor","raw_affiliation_strings":["Defence Research and Development,Canada"],"affiliations":[{"raw_affiliation_string":"Defence Research and Development,Canada","institution_ids":["https://openalex.org/I1297460800"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5014508656"],"corresponding_institution_ids":["https://openalex.org/I4210159778"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.52479742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.4749999940395355,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.4749999940395355,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.10869999974966049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.06629999727010727,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8571000099182129},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7276999950408936},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.47589999437332153},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.3707999885082245},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.33899998664855957},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.3310000002384186},{"id":"https://openalex.org/keywords/intelligent-agent","display_name":"Intelligent agent","score":0.3305000066757202}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8571000099182129},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7570000290870667},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7276999950408936},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4805000126361847},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.47589999437332153},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4334000051021576},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3707999885082245},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3310000002384186},{"id":"https://openalex.org/C74072328","wikidata":"https://www.wikidata.org/wiki/Q1142726","display_name":"Intelligent agent","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3009999990463257},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2606000006198883},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.25429999828338623},{"id":"https://openalex.org/C179768478","wikidata":"https://www.wikidata.org/wiki/Q1120057","display_name":"Cyber-physical system","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pst65910.2025.11268866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pst65910.2025.11268866","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 22nd Annual International Conference on Privacy, Security, and Trust (PST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2112420033","https://openalex.org/W2591712613","https://openalex.org/W2758725343","https://openalex.org/W2789828921","https://openalex.org/W2892303285","https://openalex.org/W2902330413","https://openalex.org/W2973862992","https://openalex.org/W3132137315","https://openalex.org/W3192530604","https://openalex.org/W4210587242","https://openalex.org/W4382793227","https://openalex.org/W4385764309","https://openalex.org/W4386386840","https://openalex.org/W4387123889","https://openalex.org/W4388191536","https://openalex.org/W4392850107","https://openalex.org/W4393763811","https://openalex.org/W4399372403","https://openalex.org/W4402307712","https://openalex.org/W4402811916"],"related_works":[],"abstract_inverted_index":{"As":[0],"cyber":[1,12,30,235],"threats":[2],"continue":[3],"to":[4,49,70,119,126,199,205,228],"evolve,":[5],"there":[6],"is":[7,63,159,182],"a":[8,84,95,121,185,192,215],"need":[9],"for":[10,28,130,233],"autonomous":[11],"defense":[13,31],"(ACD)":[14],"strategies":[15,232],"capable":[16],"of":[17,108,123,155,255],"fast":[18],"and":[19,34,46,93,115,166],"context-aware":[20],"responses.":[21],"Reinforcement":[22],"learning":[23,35],"(RL)":[24],"has":[25],"shown":[26],"promise":[27],"automating":[29],"by":[32,59,220,249],"exploring":[33],"effective":[36,164],"countermeasures,":[37],"yet":[38],"it":[39,68],"often":[40,64],"struggles":[41],"with":[42,161],"sparse":[43],"reward":[44],"signals":[45],"insufficient":[47],"context":[48],"handle":[50],"diverse":[51],"attack":[52],"scenarios.":[53],"Furthermore,":[54],"the":[55,72,105,117,124,131,136,140,143,152,156,162,173,176,180,196,201,206,221,226,246],"convergence":[56],"time":[57],"taken":[58],"an":[60,109],"RL":[61,73,89,132,144,157,202,217,248],"agent":[62,74,158,227],"high,":[65],"which":[66],"makes":[67],"difficult":[69],"train":[71],"in":[75,148,195,253],"online":[76],"settings.":[77],"To":[78,209],"address":[79],"these":[80],"challenges,":[81],"we":[82,190,213],"propose":[83],"large":[85],"language":[86],"model":[87],"(LLM)-enhanced":[88],"method":[90],"that":[91,240],"builds":[92],"queries":[94],"knowledge":[96,107,137],"graph":[97],"(KG)":[98],"derived":[99],"from":[100,139,172,179],"agent-environment":[101],"interactions.":[102],"We":[103,134],"leverage":[104],"pre-trained":[106],"LLM":[110,118,141,181],"on":[111],"different":[112],"cybersecurity":[113],"frameworks":[114],"use":[116],"analyze":[120],"part":[122],"KG":[125],"generate":[127,229],"appropriate":[128],"actions":[129],"agent.":[133],"infuse":[135],"extracted":[138],"into":[142],"agent\u2019s":[145],"training":[146],"loop":[147],"two":[149],"ways.":[150],"First,":[151],"state":[153],"vector":[154],"augmented":[160],"most":[163],"action":[165,178],"its":[167],"corresponding":[168],"reward,":[169],"as":[170,184],"determined":[171],"KG.":[174],"Second,":[175],"suggested":[177],"used":[183],"reference":[186,207],"policy.":[187,208],"In":[188],"addition,":[189],"introduce":[191],"regularization":[193],"term":[194],"loss":[197],"function":[198],"make":[200],"policy":[203],"close":[204],"validate":[210],"our":[211,241],"approach,":[212],"develop":[214],"custom":[216],"environment":[218],"guided":[219],"MITRE":[222],"ATT&CK":[223],"framework,":[224],"enabling":[225],"tailored":[230],"mitigation":[231,258],"detected":[234],"attacks.":[236],"Experimental":[237],"results":[238],"show":[239],"proposed":[242],"approach":[243],"significantly":[244],"outperforms":[245],"baseline":[247],"over":[250],"$75":[251],"\\%$":[252],"terms":[254],"taking":[256],"better":[257],"actions.":[259]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-03T00:00:00"}
