{"id":"https://openalex.org/W4411113449","doi":"https://doi.org/10.18653/v1/2025.findings-naacl.16","title":"RePD: Defending Jailbreak Attack through a Retrieval-based Prompt Decomposition Process","display_name":"RePD: Defending Jailbreak Attack through a Retrieval-based Prompt Decomposition Process","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4411113449","doi":"https://doi.org/10.18653/v1/2025.findings-naacl.16"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-naacl.16","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-naacl.16","pdf_url":"https://aclanthology.org/2025.findings-naacl.16.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: NAACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-naacl.16.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078821299","display_name":"Peiran Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peiran Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017254786","display_name":"Xiaogeng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaogeng Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5005843046","display_name":"Chaowei Xiao","orcid":"https://orcid.org/0000-0002-7043-4926"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chaowei Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.7946,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98176076,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"283","last_page":"294"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7542742490768433},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6162012815475464},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5653271079063416},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40806710720062256},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.23922434449195862}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7542742490768433},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6162012815475464},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5653271079063416},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40806710720062256},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.23922434449195862},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-naacl.16","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-naacl.16","pdf_url":"https://aclanthology.org/2025.findings-naacl.16.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: NAACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-naacl.16","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-naacl.16","pdf_url":"https://aclanthology.org/2025.findings-naacl.16.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: NAACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411113449.pdf","grobid_xml":"https://content.openalex.org/works/W4411113449.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"In":[0],"this":[1],"study,":[2],"we":[3,143],"introduce":[4],"RePD,":[5],"an":[6],"innovative":[7],"attack":[8],"Retrieval-based":[9],"Prompt":[10],"Decomposition":[11],"framework":[12],"designed":[13],"to":[14,38,57,86,91,101,167],"mitigate":[15],"the":[16,70,73,77,89,97,110,146,154],"risk":[17],"of":[18,52,72,129,148,156],"jailbreak":[19,39,54,74,159],"attacks":[20],"on":[21,31,42],"large":[22],"language":[23],"models":[24],"(LLMs).Despite":[25],"rigorous":[26],"pre-training":[27],"and":[28,59,93,124,140],"finetuning":[29],"focused":[30],"ethical":[32,120],"alignment,":[33],"LLMs":[34,131,157],"are":[35],"still":[36],"susceptible":[37],"exploits.RePD":[40],"operates":[41],"a":[43,50,82,114,127],"one-shot":[44,83],"learning":[45,84],"model,":[46],"wherein":[47],"it":[48],"accesses":[49],"database":[51],"pre-collected":[53],"prompt":[55,75,112],"templates":[56],"identify":[58],"decompose":[60],"harmful":[61,106,139],"inquiries":[62],"embedded":[63],"within":[64],"user":[65,169],"prompts.This":[66],"process":[67],"involves":[68],"integrating":[69],"decomposition":[71],"into":[76,81],"user's":[78,111],"original":[79],"query":[80],"example":[85],"effectively":[87],"teach":[88],"LLM":[90,98],"discern":[92],"separate":[94],"malicious":[95],"components.Consequently,":[96],"is":[99,122],"equipped":[100],"first":[102],"neutralize":[103],"any":[104],"potentially":[105],"elements":[107],"before":[108],"addressing":[109],"in":[113,152,165],"manner":[115],"that":[116],"aligns":[117],"with":[118,126,137],"its":[119],"guidelines.RePD":[121],"versatile":[123],"compatible":[125],"variety":[128],"opensource":[130],"acting":[132],"as":[133],"agents.Through":[134],"comprehensive":[135],"experimentation":[136],"both":[138],"benign":[141],"prompts,":[142],"have":[144],"demonstrated":[145],"efficacy":[147],"our":[149],"proposed":[150],"RePD":[151],"enhancing":[153],"resilience":[155],"against":[158],"attacks,":[160],"without":[161],"compromising":[162],"their":[163],"performance":[164],"responding":[166],"typical":[168],"requests.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
