{"id":"https://openalex.org/W7154071226","doi":"https://doi.org/10.48550/arxiv.2604.08557","title":"Re-Mask and Redirect: Exploiting Denoising Irreversibility in Diffusion Language Models","display_name":"Re-Mask and Redirect: Exploiting Denoising Irreversibility in Diffusion Language Models","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7154071226","doi":"https://doi.org/10.48550/arxiv.2604.08557"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08557","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08557","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08557","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018244556","display_name":"Arth Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Singh, Arth","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5018244556"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.795799970626831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.795799970626831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.052799999713897705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.03350000083446503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5479999780654907},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5382000207901001},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4936000108718872},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.4438999891281128},{"id":"https://openalex.org/keywords/commit","display_name":"Commit","score":0.41110000014305115},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4074000120162964},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.3926999866962433},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.38830000162124634},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.33980000019073486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6611999869346619},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5479999780654907},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5382000207901001},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4936000108718872},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45089998841285706},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.4438999891281128},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.41110000014305115},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3955000042915344},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.38830000162124634},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3379000127315521},{"id":"https://openalex.org/C72169020","wikidata":"https://www.wikidata.org/wiki/Q194404","display_name":"Monotonic function","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33250001072883606},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C2780909371","wikidata":"https://www.wikidata.org/wiki/Q4801092","display_name":"Artificial noise","level":4,"score":0.27140000462532043},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.26339998841285706},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C99821215","wikidata":"https://www.wikidata.org/wiki/Q1136583","display_name":"Swap (finance)","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C3265923","wikidata":"https://www.wikidata.org/wiki/Q669129","display_name":"Arithmetic underflow","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08557","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08557","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08557","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08557","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7382488250732422,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Safety":[0],"alignment":[1],"in":[2],"diffusion":[3],"language":[4],"models":[5],"(dLLMs)":[6],"relies":[7],"on":[8,38,67],"a":[9,31,51,103],"single":[10],"load-bearing":[11],"assumption:":[12],"that":[13,20,145],"committed":[14,26],"tokens":[15,28],"are":[16],"permanent.":[17],"We":[18,56],"show":[19],"violating":[21],"this":[22,58],"assumption,":[23],"by":[24],"re-masking":[25,90],"refusal":[27],"and":[29,73,77,93],"injecting":[30],"short":[32],"affirmative":[33],"prefix,":[34],"achieves":[35],"74-82%":[36],"ASR":[37,109],"HarmBench":[39],"across":[40,75],"all":[41],"three":[42],"publicly":[43],"available":[44],"safety-tuned":[45],"dLLMs,":[46,68],"rising":[47],"to":[48,130],"92-98%":[49],"with":[50],"generic":[52],"8-token":[53],"compliance":[54],"prefix.":[55],"call":[57,153],"attack":[59,66],"TrajHijack;":[60],"it":[61],"is":[62,87,127],"the":[63,85,134,142,154],"first":[64],"trajectory-level":[65,146],"requires":[69],"no":[70],"gradient":[71,100],"computation,":[72],"generalizes":[74],"SFT":[76],"preference-optimized":[78],"(VRPO)":[79],"models.":[80],"Three":[81],"findings":[82],"emerge.":[83],"First,":[84],"vulnerability":[86],"irreducibly":[88],"two-component:":[89],"alone":[91,95],"(4.4%)":[92],"prefix":[94],"(5.7%)":[96],"both":[97],"fail.":[98],"Second,":[99],"optimization":[101],"via":[102],"differentiable":[104],"Gumbel-softmax":[105],"chain":[106],"consistently":[107],"degrades":[108],"(41.5%":[110],"vs.":[111],"76.1%),":[112],"because":[113],"continuous":[114],"perturbations":[115],"push":[116],"token":[117],"distributions":[118],"off-manifold.":[119],"Third,":[120],"A2D":[121],"(the":[122],"strongest":[123],"published":[124],"dLLM":[125],"defense)":[126],"more":[128],"vulnerable":[129],"TrajHijack":[131],"(89.9%)":[132],"than":[133],"undefended":[135],"model":[136],"(76.1%):":[137],"its":[138],"silent-refusal":[139],"training":[140],"removes":[141],"contextual":[143],"resistance":[144],"attacks":[147],"must":[148],"overcome,":[149],"an":[150],"effect":[151],"we":[152],"Defense":[155],"Inversion":[156],"Effect.":[157]},"counts_by_year":[],"updated_date":"2026-04-15T05:59:14.812645","created_date":"2026-04-14T00:00:00"}
