{"id":"https://openalex.org/W7130337004","doi":"https://doi.org/10.48550/arxiv.2602.15689","title":"A Content-Based Framework for Cybersecurity Refusal Decisions in Large Language Models","display_name":"A Content-Based Framework for Cybersecurity Refusal Decisions in Large Language Models","publication_year":2026,"publication_date":"2026-02-17","ids":{"openalex":"https://openalex.org/W7130337004","doi":"https://doi.org/10.48550/arxiv.2602.15689"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.15689","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15689","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.15689","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Linder, Noa","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Linder, Noa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Segal, Meirav","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Segal, Meirav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069912368","display_name":"Omer Antverg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antverg, Omer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120692056","display_name":"Gil Gekker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gekker, Gil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126340792","display_name":"Tomer Fichman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fichman, Tomer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069288115","display_name":"Omri Bodenheimer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bodenheimer, Omri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126330375","display_name":"Edan Maor","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maor, Edan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126301393","display_name":"Omer Nevo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nevo, Omer","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.29249998927116394,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.29249998927116394,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.2303999960422516,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.07959999889135361,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.9478999972343445},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.6733999848365784},{"id":"https://openalex.org/keywords/obfuscation","display_name":"Obfuscation","score":0.66839998960495},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5781000256538391},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.4952999949455261},{"id":"https://openalex.org/keywords/flagging","display_name":"Flagging","score":0.3476000130176544},{"id":"https://openalex.org/keywords/threat-model","display_name":"Threat model","score":0.3172999918460846},{"id":"https://openalex.org/keywords/hacker","display_name":"Hacker","score":0.30809998512268066}],"concepts":[{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.9478999972343445},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.6733999848365784},{"id":"https://openalex.org/C40305131","wikidata":"https://www.wikidata.org/wiki/Q2616305","display_name":"Obfuscation","level":2,"score":0.66839998960495},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.6676999926567078},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6464999914169312},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5781000256538391},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.4952999949455261},{"id":"https://openalex.org/C2777548347","wikidata":"https://www.wikidata.org/wiki/Q5456937","display_name":"Flagging","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.3172999918460846},{"id":"https://openalex.org/C86844869","wikidata":"https://www.wikidata.org/wiki/Q2798820","display_name":"Hacker","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C127627568","wikidata":"https://www.wikidata.org/wiki/Q1639361","display_name":"Sociotechnical system","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.29809999465942383},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.29319998621940613},{"id":"https://openalex.org/C134121241","wikidata":"https://www.wikidata.org/wiki/Q899301","display_name":"Yield (engineering)","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.28189998865127563},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C2778223634","wikidata":"https://www.wikidata.org/wiki/Q224952","display_name":"Suspect","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C67497173","wikidata":"https://www.wikidata.org/wiki/Q977345","display_name":"Legal aspects of computing","level":3,"score":0.2590000033378601},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.15689","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15689","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.15689","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15689","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6854368448257446}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"and":[3,24,48,69,91,117,148],"LLM-based":[4],"agents":[5],"are":[6,13],"increasingly":[7],"used":[8],"for":[9,89,120],"cybersecurity":[10],"tasks":[11],"that":[12,58,96,137],"inherently":[14],"dual-use.":[15],"Existing":[16],"approaches":[17],"to":[18,151],"refusal,":[19],"spanning":[20],"academic":[21],"policy":[22],"frameworks":[23],"commercially":[25],"deployed":[26],"systems,":[27],"often":[28],"rely":[29],"on":[30,76],"broad":[31],"topic-based":[32],"bans":[33],"or":[34,53,78],"offensive-focused":[35],"taxonomies.":[36],"As":[37],"a":[38,86],"result,":[39],"they":[40],"can":[41],"yield":[42],"inconsistent":[43],"decisions,":[44],"over-restrict":[45],"legitimate":[46],"defenders,":[47],"behave":[49],"brittlely":[50],"under":[51],"obfuscation":[52],"request":[54,130],"segmentation.":[55],"We":[56,135],"argue":[57],"effective":[59],"refusal":[60,94,155],"requires":[61],"explicitly":[62],"modeling":[63],"the":[64,125,129],"trade-off":[65],"between":[66],"offensive":[67,79],"risk":[68],"defensive":[70],"benefit,":[71],"rather":[72,131],"than":[73,132],"relying":[74],"solely":[75],"intent":[77],"classification.":[80],"In":[81],"this":[82,138],"paper,":[83],"we":[84],"introduce":[85],"content-based":[87],"framework":[88,102],"designing":[90],"auditing":[92],"cyber":[93],"policies":[95],"makes":[97],"offense-defense":[98],"tradeoffs":[99],"explicit.":[100],"The":[101],"characterizes":[103],"requests":[104],"along":[105],"five":[106],"dimensions:":[107],"Offensive":[108,111],"Action":[109],"Contribution,":[110],"Risk,":[112],"Technical":[113],"Complexity,":[114],"Defensive":[115],"Benefit,":[116],"Expected":[118],"Frequency":[119],"Legitimate":[121],"Users,":[122],"grounded":[123],"in":[124,143],"technical":[126],"substance":[127],"of":[128],"stated":[133],"intent.":[134],"demonstrate":[136],"content-grounded":[139],"approach":[140],"resolves":[141],"inconsistencies":[142],"current":[144],"frontier":[145],"model":[146],"behavior":[147],"allows":[149],"organizations":[150],"construct":[152],"tunable,":[153],"risk-aware":[154],"policies.":[156]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-02-19T00:00:00"}
