{"id":"https://openalex.org/W7154973216","doi":"https://doi.org/10.48550/arxiv.2604.15717","title":"Into the Gray Zone: Domain Contexts Can Blur LLM Safety Boundaries","display_name":"Into the Gray Zone: Domain Contexts Can Blur LLM Safety Boundaries","publication_year":2026,"publication_date":"2026-04-17","ids":{"openalex":"https://openalex.org/W7154973216","doi":"https://doi.org/10.48550/arxiv.2604.15717"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.15717","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15717","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15717","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134077822","display_name":"Ki Sen Hung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hung, Ki Sen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134015372","display_name":"Xi Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134006234","display_name":"Chang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134084815","display_name":"Haoran Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134077356","display_name":"Kejiang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Kejiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134061383","display_name":"Changxuan Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Changxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134006370","display_name":"Tsun On Kwok","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwok, Tsun On","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134082415","display_name":"Weiming Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134087686","display_name":"Xiaomeng Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaomeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134088126","display_name":"Yangqiu Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Yangqiu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.5884000062942505,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.5884000062942505,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.13989999890327454,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.062300000339746475,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6556000113487244},{"id":"https://openalex.org/keywords/harm","display_name":"Harm","score":0.6019999980926514},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5227000117301941},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4163999855518341},{"id":"https://openalex.org/keywords/gray","display_name":"Gray (unit)","score":0.39089998602867126},{"id":"https://openalex.org/keywords/helpfulness","display_name":"Helpfulness","score":0.3878999948501587},{"id":"https://openalex.org/keywords/jargon","display_name":"Jargon","score":0.3725000023841858},{"id":"https://openalex.org/keywords/copycat","display_name":"Copycat","score":0.31130000948905945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6717000007629395},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6556000113487244},{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.6019999980926514},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5227000117301941},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4927999973297119},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4163999855518341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39340001344680786},{"id":"https://openalex.org/C166275286","wikidata":"https://www.wikidata.org/wiki/Q190095","display_name":"Gray (unit)","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C2781265381","wikidata":"https://www.wikidata.org/wiki/Q5710255","display_name":"Helpfulness","level":2,"score":0.3878999948501587},{"id":"https://openalex.org/C2777611551","wikidata":"https://www.wikidata.org/wiki/Q17951","display_name":"Jargon","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C130191384","wikidata":"https://www.wikidata.org/wiki/Q2996887","display_name":"Copycat","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.3091000020503998},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C62989814","wikidata":"https://www.wikidata.org/wiki/Q854648","display_name":"Gossip","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C2780771206","wikidata":"https://www.wikidata.org/wiki/Q3271761","display_name":"Safeguard","level":2,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.15717","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15717","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15717","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15717","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6174503564834595}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"central":[1],"goal":[2],"of":[3],"LLM":[4],"alignment":[5,143],"is":[6,28],"to":[7],"balance":[8],"helpfulness":[9],"with":[10,73],"harmlessness,":[11],"yet":[12,135],"these":[13],"objectives":[14],"conflict":[15],"when":[16],"the":[17],"same":[18],"knowledge":[19],"serves":[20],"both":[21],"legitimate":[22],"and":[23,91,110,138],"malicious":[24],"purposes.":[25],"This":[26],"tension":[27],"amplified":[29],"by":[30],"context-sensitive":[31],"alignment:":[32],"we":[33,65,125],"observe":[34],"that":[35,77,101,130],"domain-specific":[36],"contexts":[37,49,72],"(e.g.,":[38,50],"chemistry)":[39],"selectively":[40],"relax":[41],"defenses":[42],"for":[43],"domain-relevant":[44],"harmful":[45,111],"knowledge,":[46],"while":[47,149],"safety-research":[48,71],"jailbreak":[51],"studies)":[52],"trigger":[53],"broader":[54],"relaxation":[55],"spanning":[56],"all":[57],"harm":[58],"categories.":[59],"To":[60,121],"systematically":[61],"exploit":[62],"this":[63,123,140],"vulnerability,":[64,124],"propose":[66],"Jargon,":[67],"a":[68,113,127],"framework":[69],"combining":[70],"multi-turn":[74],"adversarial":[75],"interactions":[76],"achieves":[78],"attack":[79,146],"success":[80,147],"rates":[81,148],"exceeding":[82],"93%":[83],"across":[84],"seven":[85],"frontier":[86],"models,":[87],"including":[88],"GPT-5.2,":[89],"Claude-4.5,":[90],"Gemini-3,":[92],"substantially":[93],"outperforming":[94],"existing":[95],"methods.":[96],"Activation":[97],"space":[98],"analysis":[99],"reveals":[100],"Jargon":[102],"queries":[103],"occupy":[104],"an":[105],"intermediate":[106],"region":[107],"between":[108],"benign":[109],"inputs,":[112],"gray":[114],"zone":[115],"where":[116],"refusal":[117],"decisions":[118],"become":[119],"unreliable.":[120],"mitigate":[122],"design":[126],"policy-guided":[128],"safeguard":[129],"steers":[131],"models":[132],"toward":[133],"helpful":[134],"harmless":[136],"responses,":[137],"internalize":[139],"capability":[141],"through":[142],"fine-tuning,":[144],"reducing":[145],"preserving":[150],"helpfulness.":[151]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-21T00:00:00"}
