{"id":"https://openalex.org/W4416035833","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1128","title":"DiffusionAttacker: Diffusion-Driven Prompt Manipulation for LLM Jailbreak","display_name":"DiffusionAttacker: Diffusion-Driven Prompt Manipulation for LLM Jailbreak","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416035833","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1128"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1128","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1128","pdf_url":"https://aclanthology.org/2025.emnlp-main.1128.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.1128.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100630521","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0002-9435-5501"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hao Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101601229","display_name":"Hao Li","orcid":"https://orcid.org/0000-0002-3388-0740"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020030027","display_name":"Jiachen Zhu","orcid":"https://orcid.org/0000-0002-0162-5155"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junda Zhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100634222","display_name":"Xinyuan Wang","orcid":"https://orcid.org/0009-0007-5086-2310"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinyuan Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109163742","display_name":"Chengwei Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chengwei Pan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091438230","display_name":"Mingqian Huang","orcid":"https://orcid.org/0000-0002-5734-0191"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minlie Huang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5079222154","display_name":"Lei Sha","orcid":"https://orcid.org/0000-0001-5914-7590"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei Sha","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100630521"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2456,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91408781,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"22193","last_page":"22205"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2678000032901764,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2678000032901764,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.13420000672340393,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.07599999755620956,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.2777999937534332},{"id":"https://openalex.org/keywords/troubleshooting","display_name":"Troubleshooting","score":0.24390000104904175},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.22370000183582306},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.2232999950647354},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.21860000491142273}],"concepts":[{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.42100000381469727},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2985999882221222},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.24390000104904175},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.22370000183582306},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2232999950647354},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.21860000491142273},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2175000011920929},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.20579999685287476},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20110000669956207}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.1128","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1128","pdf_url":"https://aclanthology.org/2025.emnlp-main.1128.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1128","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1128","pdf_url":"https://aclanthology.org/2025.emnlp-main.1128.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2218190453","display_name":null,"funder_award_id":"92367204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2802911279","display_name":null,"funder_award_id":"Young","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6058138561","display_name":null,"funder_award_id":", No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320333688","display_name":"National Outstanding Youth Science Fund Project of National Natural Science Foundation of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416035833.pdf","grobid_xml":"https://content.openalex.org/works/W4416035833.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4],"susceptible":[5],"to":[6,31,104,150],"generating":[7],"harmful":[8,143],"content":[9,136],"when":[10],"prompted":[11],"with":[12,37,93],"carefully":[13],"crafted":[14],"inputs,":[15],"a":[16,74,81,94,123],"vulnerability":[17],"known":[18],"as":[19,80],"LLM":[20],"jailbreaking.As":[21],"LLMs":[22,103],"become":[23],"more":[24,128],"powerful,":[25],"studying":[26],"jailbreak":[27,40,66,106],"methods":[28,52,179],"is":[29],"critical":[30],"enhancing":[32],"security":[33],"and":[34,88,116,172,190],"aligning":[35],"models":[36],"human":[38],"values.Traditionally,":[39],"techniques":[41],"have":[42],"relied":[43],"on":[44,84,170],"suffix":[45],"addition":[46],"or":[47],"prompt":[48,87,140],"templates,":[49],"but":[50],"these":[51],"suffer":[53],"from":[54,155],"limited":[55],"attack":[56,96,185],"diversity.This":[57],"paper":[58],"introduces":[59],"DiffusionAttacker,":[60],"an":[61],"end-to-end":[62],"generative":[63],"approach":[64,132],"for":[65,165],"rewriting":[67,119],"inspired":[68],"by":[69],"diffusion":[70,78,125,157],"models.Our":[71],"method":[72],"employs":[73],"sequence-to-sequence":[75],"(seq2seq)":[76],"text":[77],"model":[79],"generator,":[82],"conditioning":[83],"the":[85,90,110,118,134,138,147,152,156,163],"original":[86,139],"guiding":[89],"denoising":[91],"process":[92,154],"novel":[95],"loss.Unlike":[97],"previous":[98,178],"approaches":[99],"that":[100,175],"use":[101],"autoregressive":[102],"generate":[105],"prompts,":[107],"which":[108],"limit":[109],"modification":[111],"of":[112,137],"already":[113],"generated":[114],"tokens":[115],"restrict":[117],"space,":[120],"DiffusionAttacker":[121,176],"utilizes":[122],"seq2seq":[124],"model,":[126],"allowing":[127],"flexible":[129],"token":[130,167],"modifications.This":[131],"preserves":[133],"semantic":[135],"while":[141],"producing":[142],"content.Additionally,":[144],"we":[145],"leverage":[146],"Gumbel-Softmax":[148],"technique":[149],"make":[151],"sampling":[153],"model's":[158],"output":[159],"distribution":[160],"differentiable,":[161],"eliminating":[162],"need":[164],"iterative":[166],"search.Extensive":[168],"experiments":[169],"Advbench":[171],"Harmbench":[173],"demonstrate":[174],"outperforms":[177],"across":[180],"various":[181],"evaluation":[182],"metrics,":[183],"including":[184],"success":[186],"rate":[187],"(ASR),":[188],"fluency,":[189],"diversity.":[191]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-11-08T00:00:00"}
