{"id":"https://openalex.org/W7135220455","doi":"https://doi.org/10.48550/arxiv.2603.11214","title":"Measuring AI Agents' Progress on Multi-Step Cyber Attack Scenarios","display_name":"Measuring AI Agents' Progress on Multi-Step Cyber Attack Scenarios","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135220455","doi":"https://doi.org/10.48550/arxiv.2603.11214"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11214","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128952983","display_name":"Linus Folkerts","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Folkerts, Linus","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129093717","display_name":"Will Payne","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Payne, Will","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129014539","display_name":"Simon Inman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Inman, Simon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128951451","display_name":"Philippos Giavridis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giavridis, Philippos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038680624","display_name":"Joe Skinner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Skinner, Joe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129064055","display_name":"Sam Deverett","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deverett, Sam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128954327","display_name":"James Aung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aung, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126709349","display_name":"Ekin Zorer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zorer, Ekin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123685026","display_name":"Michael Schmatz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schmatz, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129041018","display_name":"Mahmoud Ghanem","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghanem, Mahmoud","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129070436","display_name":"John Wilkinson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wilkinson, John","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053401067","display_name":"Alan Steer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Steer, Alan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129022749","display_name":"Vy Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Vy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101905086","display_name":"Jessica Wang","orcid":"https://orcid.org/0000-0002-4829-5907"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jessica","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5128952983"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10917","display_name":"Smart Grid Security and Resilience","score":0.2847999930381775,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10917","display_name":"Smart Grid Security and Resilience","score":0.2847999930381775,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.19439999759197235,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.14059999585151672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sophistication","display_name":"Sophistication","score":0.5993000268936157},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.567799985408783},{"id":"https://openalex.org/keywords/chaining","display_name":"Chaining","score":0.45249998569488525},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4361000061035156},{"id":"https://openalex.org/keywords/industrial-control-system","display_name":"Industrial control system","score":0.3634999990463257},{"id":"https://openalex.org/keywords/forward-chaining","display_name":"Forward chaining","score":0.34389999508857727},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.335999995470047},{"id":"https://openalex.org/keywords/attack-patterns","display_name":"Attack patterns","score":0.30709999799728394}],"concepts":[{"id":"https://openalex.org/C168725872","wikidata":"https://www.wikidata.org/wiki/Q991663","display_name":"Sophistication","level":2,"score":0.5993000268936157},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5763999819755554},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.567799985408783},{"id":"https://openalex.org/C49020025","wikidata":"https://www.wikidata.org/wiki/Q1059099","display_name":"Chaining","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4361000061035156},{"id":"https://openalex.org/C40071531","wikidata":"https://www.wikidata.org/wiki/Q2513962","display_name":"Industrial control system","level":3,"score":0.3634999990463257},{"id":"https://openalex.org/C142614401","wikidata":"https://www.wikidata.org/wiki/Q777433","display_name":"Forward chaining","level":3,"score":0.34389999508857727},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.32420000433921814},{"id":"https://openalex.org/C2780741293","wikidata":"https://www.wikidata.org/wiki/Q4818019","display_name":"Attack patterns","level":3,"score":0.30709999799728394},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.30169999599456787},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C104122410","wikidata":"https://www.wikidata.org/wiki/Q1416406","display_name":"Network model","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2879999876022339},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2777999937534332},{"id":"https://openalex.org/C201307755","wikidata":"https://www.wikidata.org/wiki/Q4071928","display_name":"Cyber-attack","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2718000113964081},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.2637999951839447},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26350000500679016}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.44697943329811096,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"evaluate":[1],"the":[2,87,102,138,148,157,162],"autonomous":[3],"cyber-attack":[4],"capabilities":[5,29],"of":[6,77,130,137,170],"frontier":[7],"AI":[8],"models":[9,37,160],"on":[10,101],"two":[11,55],"purpose-built":[12],"cyber":[13],"ranges-a":[14],"32-step":[15],"corporate":[16,103],"network":[17,104],"attack":[18],"and":[19],"a":[20,142],"7-step":[21],"industrial":[22,149],"control":[23,150],"system":[24,151],"attack-that":[25],"require":[26],"chaining":[27],"heterogeneous":[28],"across":[30],"extended":[31],"action":[32],"sequences.":[33],"By":[34],"comparing":[35],"seven":[36],"released":[38],"over":[39],"an":[40],"eighteen-month":[41],"period":[42],"(August":[43],"2024":[44],"to":[45,72,79,118,134,164],"February":[46,122],"2026)":[47],"at":[48,97,109],"varying":[49],"inference-time":[50,64],"compute":[51],"budgets,":[52],"we":[53],"observe":[54],"capability":[56],"trends.":[57],"First,":[58],"model":[59,92],"performance":[60,153],"scales":[61],"log-linearly":[62],"with":[63,66],"compute,":[65],"no":[67,82],"observed":[68],"plateau-increasing":[69],"from":[70,86,113],"10M":[71,110],"100M":[73],"tokens":[74,111],"yields":[75],"gains":[76],"up":[78],"59%,":[80],"requiring":[81],"specific":[83],"technical":[84],"sophistication":[85],"operator.":[88],"Second,":[89],"each":[90],"successive":[91],"generation":[93],"outperforms":[94],"its":[95],"predecessor":[96],"fixed":[98],"token":[99],"budgets:":[100],"range,":[105,152],"average":[106],"steps":[107],"completed":[108,128],"rose":[112],"1.7":[114],"(GPT-4o,":[115],"August":[116],"2024)":[117],"9.8":[119],"(Opus":[120],"4.6,":[121],"2026).":[123],"The":[124],"best":[125],"single":[126],"run":[127],"22":[129],"32":[131],"steps,":[132,167],"corresponding":[133],"roughly":[135],"6":[136],"estimated":[139],"14":[140],"hours":[141],"human":[143],"expert":[144],"would":[145],"need.":[146],"On":[147],"remains":[154],"limited,":[155],"though":[156],"most":[158],"recent":[159],"are":[161],"first":[163],"reliably":[165],"complete":[166],"averaging":[168],"1.2-1.4":[169],"7":[171],"(max":[172],"3).":[173]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2026-03-14T00:00:00"}
