{"id":"https://openalex.org/W4414605241","doi":"https://doi.org/10.1109/qrs65678.2025.00017","title":"Defending Llms Against Jailbreak Prompts Through Key Information Protection and Selective Compression","display_name":"Defending Llms Against Jailbreak Prompts Through Key Information Protection and Selective Compression","publication_year":2025,"publication_date":"2025-07-16","ids":{"openalex":"https://openalex.org/W4414605241","doi":"https://doi.org/10.1109/qrs65678.2025.00017"},"language":"en","primary_location":{"id":"doi:10.1109/qrs65678.2025.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/qrs65678.2025.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 25th International Conference on Software Quality, Reliability and Security (QRS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100436441","display_name":"Siyu Li","orcid":"https://orcid.org/0000-0003-4960-190X"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Siyu Li","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics,Nanjing,Jiangsu,China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics,Nanjing,Jiangsu,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038150414","display_name":"Yu Zhou","orcid":"https://orcid.org/0000-0002-3723-7584"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zhou","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics,Nanjing,Jiangsu,China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics,Nanjing,Jiangsu,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115028456","display_name":"Xiangyu Zhang","orcid":"https://orcid.org/0009-0000-6271-746X"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zhang","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics,Nanjing,Jiangsu,China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics,Nanjing,Jiangsu,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100696341","display_name":"Tingting Han","orcid":"https://orcid.org/0000-0001-5648-9624"},"institutions":[{"id":"https://openalex.org/I98259816","display_name":"Birkbeck, University of London","ror":"https://ror.org/02mb95055","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I98259816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tingting Han","raw_affiliation_strings":["Birkbeck, University of London,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Birkbeck, University of London,London,United Kingdom","institution_ids":["https://openalex.org/I98259816"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100436441"],"corresponding_institution_ids":["https://openalex.org/I9842412"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36334353,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"58","last_page":"67"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12705","display_name":"Educational Reforms and Innovations","score":0.715499997138977,"subfield":{"id":"https://openalex.org/subfields/2307","display_name":"Health, Toxicology and Mutagenesis"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12705","display_name":"Educational Reforms and Innovations","score":0.715499997138977,"subfield":{"id":"https://openalex.org/subfields/2307","display_name":"Health, Toxicology and Mutagenesis"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6044999957084656},{"id":"https://openalex.org/keywords/threat-model","display_name":"Threat model","score":0.54830002784729},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.483599990606308},{"id":"https://openalex.org/keywords/information-security","display_name":"Information security","score":0.48030000925064087},{"id":"https://openalex.org/keywords/information-protection-policy","display_name":"Information protection policy","score":0.41850000619888306},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.41130000352859497},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.40049999952316284},{"id":"https://openalex.org/keywords/compromise","display_name":"Compromise","score":0.3831000030040741},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.37720000743865967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7515000104904175},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.7458000183105469},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6044999957084656},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.54830002784729},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.483599990606308},{"id":"https://openalex.org/C527648132","wikidata":"https://www.wikidata.org/wiki/Q189900","display_name":"Information security","level":2,"score":0.48030000925064087},{"id":"https://openalex.org/C168297262","wikidata":"https://www.wikidata.org/wiki/Q6031182","display_name":"Information protection policy","level":2,"score":0.41850000619888306},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.40720000863075256},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C46355384","wikidata":"https://www.wikidata.org/wiki/Q726686","display_name":"Compromise","level":2,"score":0.3831000030040741},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C137822555","wikidata":"https://www.wikidata.org/wiki/Q2587068","display_name":"Information sensitivity","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C121822524","wikidata":"https://www.wikidata.org/wiki/Q5157582","display_name":"Computer security model","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C154908896","wikidata":"https://www.wikidata.org/wiki/Q2167404","display_name":"Security policy","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C195518309","wikidata":"https://www.wikidata.org/wiki/Q13424265","display_name":"Security testing","level":5,"score":0.290800005197525},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C169093310","wikidata":"https://www.wikidata.org/wiki/Q3702971","display_name":"Personally identifiable information","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C29852176","wikidata":"https://www.wikidata.org/wiki/Q373338","display_name":"Critical infrastructure","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C65856478","wikidata":"https://www.wikidata.org/wiki/Q3991682","display_name":"Attack model","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25130000710487366},{"id":"https://openalex.org/C2778717966","wikidata":"https://www.wikidata.org/wiki/Q4189076","display_name":"Protection mechanism","level":3,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/qrs65678.2025.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/qrs65678.2025.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 25th International Conference on Software Quality, Reliability and Security (QRS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1965555277","https://openalex.org/W2122825543","https://openalex.org/W2963339397","https://openalex.org/W3184008713","https://openalex.org/W4210499321","https://openalex.org/W4387869031","https://openalex.org/W4389520043","https://openalex.org/W4389523893","https://openalex.org/W4401043746","https://openalex.org/W4402671676","https://openalex.org/W4407989189","https://openalex.org/W4410090047","https://openalex.org/W4410609100","https://openalex.org/W4412888250"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,10,39,44,58,63,113,132,146,160,182],"widespread":[2],"application":[3],"of":[4,12,43,62,115,134],"Large":[5],"Language":[6],"Models":[7],"(LLMs)":[8],"in":[9,163,180],"field":[11],"natural":[13],"language":[14],"processing":[15],"and":[16,60,74,83,119,158,186,194],"software":[17,65],"engineering,":[18],"security":[19,34,41,61],"vulnerabilities":[20],"have":[21],"emerged":[22],"as":[23,36],"a":[24,32,94,135],"critical":[25,124,151],"concern.":[26],"Among":[27],"these,":[28],"jailbreak":[29,128,156,188],"attacks":[30,129,166],"represent":[31],"prevalent":[33],"threat,":[35],"they":[37],"bypass":[38],"internal":[40],"checks":[42],"model":[45,169,183],"through":[46],"carefully":[47],"designed":[48],"input":[49],"perturbations,":[50],"generating":[51],"malicious":[52],"outputs":[53],"which":[54],"severely":[55],"may":[56],"compromise":[57],"reliability":[59],"LLM-based":[64],"tools.":[66],"Existing":[67],"defense":[68,96,178],"methods":[69,179],"based":[70],"on":[71],"reinforcement":[72],"learning":[73],"fine-tuning":[75],"often":[76],"suffer":[77],"from":[78],"limited":[79],"generalization,":[80],"low":[81],"interpretability,":[82],"high":[84],"computational":[85],"overhead.":[86],"To":[87],"address":[88],"these":[89,150],"limitations,":[90],"we":[91],"propose":[92],"MaskedDefender,":[93],"novel":[95],"approach":[97,154],"that":[98,174],"detects":[99],"potential":[100],"attack":[101],"features":[102],"by":[103,112,130],"analyzing":[104],"model's":[105,147,161],"response":[106],"differences":[107],"to":[108,144,184],"various":[109],"inputs.":[110],"Guided":[111],"principle":[114],"key":[116],"information":[117],"protection":[118],"selective":[120],"compression,":[121],"MaskedDefender":[122,175],"identifies":[123],"tokens":[125],"associated":[126],"with":[127],"optimizing":[131],"gradient":[133],"multi-objective":[136],"loss":[137],"function.":[138],"It":[139],"then":[140],"applies":[141],"soft":[142],"guidance":[143],"steer":[145],"attention":[148],"toward":[149],"tokens.":[152],"Our":[153],"highlights":[155],"intentions":[157],"reduces":[159],"confusion":[162],"identifying":[164],"such":[165],"without":[167],"modifying":[168],"parameters.":[170],"Experimental":[171],"results":[172],"show":[173],"outperforms":[176],"existing":[177],"enabling":[181],"detect":[185],"resist":[187],"attacks,":[189],"while":[190],"maintaining":[191],"both":[192],"efficiency":[193],"effectiveness.":[195]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
