{"id":"https://openalex.org/W7160149524","doi":"https://doi.org/10.1016/j.neunet.2026.109065","title":"SecReEvalBench: A real-world scenario-based security resilience benchmark for large language models","display_name":"SecReEvalBench: A real-world scenario-based security resilience benchmark for large language models","publication_year":2026,"publication_date":"2026-05-01","ids":{"openalex":"https://openalex.org/W7160149524","doi":"https://doi.org/10.1016/j.neunet.2026.109065","pmid":"https://pubmed.ncbi.nlm.nih.gov/42096884"},"language":"en","primary_location":{"id":"doi:10.1016/j.neunet.2026.109065","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neunet.2026.109065","pdf_url":null,"source":{"id":"https://openalex.org/S123019304","display_name":"Neural Networks","issn_l":"0893-6080","issn":["0893-6080","1879-2782"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Networks","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.neunet.2026.109065","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135182402","display_name":"Huining Cui","orcid":"https://orcid.org/0009-0004-7408-8719"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Huining Cui","raw_affiliation_strings":["School of Computer Science, University of Technology Sydney, 15 Broadway, Sydney, 2008, NSW, Australia. Electronic address: Huining.Cui-1@student.uts.edu.au"],"raw_orcid":"https://orcid.org/0009-0004-7408-8719","affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Technology Sydney, 15 Broadway, Sydney, 2008, NSW, Australia. Electronic address: Huining.Cui-1@student.uts.edu.au","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135165698","display_name":"Wei Liu","orcid":"https://orcid.org/0009-0008-6862-3593"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wei Liu","raw_affiliation_strings":["School of Computer Science, University of Technology Sydney, 15 Broadway, Sydney, 2008, NSW, Australia. Electronic address: wei.liu@uts.edu.au"],"raw_orcid":"https://orcid.org/0000-0002-3003-1313","affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Technology Sydney, 15 Broadway, Sydney, 2008, NSW, Australia. Electronic address: wei.liu@uts.edu.au","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5135182402"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":{"value":3350,"currency":"USD","value_usd":3350},"apc_paid":{"value":3350,"currency":"USD","value_usd":3350},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.92708188,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"202","issue":null,"first_page":"109065","last_page":"109065"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.13220000267028809,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.13220000267028809,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0828000009059906,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.07360000163316727,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.609000027179718},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5698999762535095},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.37139999866485596},{"id":"https://openalex.org/keywords/computer-security-model","display_name":"Computer security model","score":0.28519999980926514},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.2711000144481659}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7231000065803528},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.609000027179718},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5698999762535095},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5034000277519226},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.367900013923645},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.296099990606308},{"id":"https://openalex.org/C121822524","wikidata":"https://www.wikidata.org/wiki/Q5157582","display_name":"Computer security model","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26440000534057617},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.251800000667572},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1016/j.neunet.2026.109065","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neunet.2026.109065","pdf_url":null,"source":{"id":"https://openalex.org/S123019304","display_name":"Neural Networks","issn_l":"0893-6080","issn":["0893-6080","1879-2782"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Networks","raw_type":"journal-article"},{"id":"pmid:42096884","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/42096884","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural networks : the official journal of the International Neural Network Society","raw_type":null}],"best_oa_location":{"id":"doi:10.1016/j.neunet.2026.109065","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neunet.2026.109065","pdf_url":null,"source":{"id":"https://openalex.org/S123019304","display_name":"Neural Networks","issn_l":"0893-6080","issn":["0893-6080","1879-2782"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Networks","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6243083477020264,"id":"https://metadata.un.org/sdg/13","display_name":"Climate action"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W3090559339","https://openalex.org/W4285210452","https://openalex.org/W4389518968","https://openalex.org/W4390750480","https://openalex.org/W4402670748","https://openalex.org/W4412945444","https://openalex.org/W4415795551","https://openalex.org/W7155092381"],"related_works":[],"abstract_inverted_index":{"We":[0,32],"study":[1],"the":[2],"security":[3,80],"resilience":[4],"of":[5],"large":[6],"language":[7],"models":[8,127,140],"to":[9,29,54,108],"prompt-chain":[10],"attacks":[11],"that":[12,145,150],"exploit":[13],"context":[14],"retention":[15],"and":[16,25,50,69,85,119,137,142,161,175,183],"sequencing.":[17],"Existing":[18],"evaluations":[19],"emphasize":[20],"single-turn":[21,151],"or":[22,156],"fixed-domain":[23],"settings":[24],"thus":[26],"under-measure":[27],"vulnerabilities":[28,149],"sequence-aware":[30,146,184],"adversaries.":[31],"introduce":[33],"SecReEvalBench,":[34],"a":[35,105,177],"scenario-based":[36],"benchmark":[37],"with":[38,89,104],"six":[39],"attack":[40,83],"sequences":[41],"(one-off,":[42],"successive,":[43],"successive-reverse,":[44],"alternative,":[45],"sequential":[46,48],"ascending,":[47],"descending)":[49],"four":[51,86],"metrics":[52,147],"tailored":[53],"multi-turn":[55,172],"security:":[56],"Prompt":[57,61],"Attack":[58,62,66,71],"Resilience":[59,67],"Score,":[60,64,68],"Refusal-Logic":[63],"Chain":[65,70],"Refusal":[72],"Timing":[73],"Score.":[74],"The":[75],"accompanying":[76],"dataset":[77,187],"spans":[78],"seven":[79],"domains,":[81],"sixteen":[82],"techniques,":[84],"maliciousness":[87],"levels":[88],"dual-LLM":[90],"adjudication":[91],"for":[92,180],"intent":[93],"labels.":[94],"Our":[95,186],"evaluation":[96],"protocol":[97],"combines":[98],"an":[99],"unsafe-content":[100],"detector":[101],"(Llama-Guard":[102],"3)":[103,136],"refusal":[106,114],"classifier":[107],"disentangle":[109],"eventual":[110],"unsafe":[111],"cooperation":[112],"from":[113],"timing,":[115],"yielding":[116],"bounded,":[117],"auditable,":[118],"sequence-sensitive":[120],"measurements.":[121],"Experiments":[122],"on":[123],"five":[124],"representative":[125],"open-weight":[126],"(Llama":[128],"3.1,":[129],"Gemma":[130],"2,":[131],"Mistral":[132],"v0.3,":[133],"DeepSeek-R1,":[134],"Qwen":[135],"two":[138],"proprietary":[139],"(gpt-3.5-turbo":[141],"gpt-4o-mini)":[143],"show":[144],"surface":[148],"tests":[152],"miss,":[153],"including":[154],"delayed":[155],"absent":[157],"refusals":[158],"under":[159],"escalation":[160],"leakage":[162],"through":[163],"visible":[164],"intermediate":[165],"reasoning":[166],"despite":[167],"surface-level":[168],"refusals.":[169],"SecReEvalBench":[170],"standardizes":[171],"safety":[173],"assessment":[174],"provides":[176],"reproducible":[178],"basis":[179],"developing":[181],"domain-":[182],"defenses.":[185],"is":[188],"made":[189],"publicly":[190],"available":[191],"at":[192],"https://kaggle.com/datasets/5a7ee22cf9dab6c93b55a73f630f6c9b42e936351b0ae98fbae6ddaca7fe248d.":[193]},"counts_by_year":[],"updated_date":"2026-05-09T06:09:20.037420","created_date":"2026-05-05T00:00:00"}
