{"id":"https://openalex.org/W7161806605","doi":"https://doi.org/10.48550/arxiv.2605.20023","title":"When Skills Don't Help: A Negative Result on Procedural Knowledge for Tool-Grounded Agents in Offensive Cybersecurity","display_name":"When Skills Don't Help: A Negative Result on Procedural Knowledge for Tool-Grounded Agents in Offensive Cybersecurity","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161806605","doi":"https://doi.org/10.48550/arxiv.2605.20023"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.20023","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20023","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.20023","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033842275","display_name":"Samuel Jacob Chacko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chacko, Samuel Jacob","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136604697","display_name":"James Hugglestone","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hugglestone, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092269738","display_name":"Chashi Mahiul Islam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Islam, Chashi Mahiul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136540179","display_name":"Xiuwen Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xiuwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.61080002784729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.61080002784729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.08389999717473984,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.7836999893188477},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.6047999858856201},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5393000245094299},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5264999866485596},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.44609999656677246},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.41019999980926514},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.3862999975681305},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.38370001316070557}],"concepts":[{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.7836999893188477},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.6047999858856201},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5393000245094299},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5264999866485596},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4632999897003174},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.44609999656677246},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.4212000072002411},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.41019999980926514},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.38370001316070557},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3521000146865845},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3310999870300293},{"id":"https://openalex.org/C111065885","wikidata":"https://www.wikidata.org/wiki/Q1189053","display_name":"Fuzz testing","level":3,"score":0.32440000772476196},{"id":"https://openalex.org/C116222747","wikidata":"https://www.wikidata.org/wiki/Q220888","display_name":"Falsifiability","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.30469998717308044},{"id":"https://openalex.org/C124469403","wikidata":"https://www.wikidata.org/wiki/Q1813993","display_name":"Procedural knowledge","level":3,"score":0.2939000129699707},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C97082442","wikidata":"https://www.wikidata.org/wiki/Q1934361","display_name":"Skills management","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.25519999861717224},{"id":"https://openalex.org/C2777042776","wikidata":"https://www.wikidata.org/wiki/Q4583103","display_name":"Preparedness","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.20023","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20023","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.20023","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.20023","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.695334792137146,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Agent":[0],"Skills,":[1],"structured":[2],"packages":[3],"of":[4,26,41,79,90,130,155,207],"procedural":[5,191],"knowledge":[6],"loaded":[7],"into":[8],"an":[9,24,80,177],"LLM":[10],"agent":[11,85],"at":[12],"inference":[13],"time,":[14],"are":[15,49,67,196],"widely":[16],"reported":[17],"to":[18,107,199,243],"improve":[19],"task":[20],"pass":[21],"rates":[22],"by":[23,123],"average":[25],"16.2~percentage":[27],"points":[28],"across":[29],"diverse":[30],"domains.":[31],"Yet":[32],"the":[33,127,136,163,170,186,190,204,240],"same":[34],"benchmarks":[35],"show":[36,100],"wide":[37],"variance,":[38],"with":[39],"16":[40],"84":[42],"tasks":[43],"suffering":[44],"negative":[45],"deltas":[46],"when":[47,65],"Skills":[48,62,125,131,195,209],"introduced.":[50],"The":[51,133],"community":[52],"has":[53],"not":[54,120],"yet":[55],"articulated":[56],"a":[57,73,108,118,202,226],"clean":[58],"mechanism":[59],"for":[60,233],"\\emph{when}":[61],"help":[63],"and":[64,96,99,112,138,237],"they":[66],"merely":[68],"redundant":[69],"overhead.":[70],"We":[71,167,224],"re-analyze":[72],"recently":[74],"published":[75],"180-run":[76],"controlled":[77],"study":[78],"MCP-grounded":[81],"autonomous":[82],"Capture-the-Flag":[83],"(CTF)":[84],"under":[86],"four":[87],"documentation":[88],"conditions":[89,103,140],"increasing":[91],"richness":[92],"(591,":[93],"12865,":[94],"17253,":[95],"36001":[97],"tokens)":[98],"that":[101,169,194],"these":[102],"correspond":[104],"almost":[105],"exactly":[106],"No-Skills,":[109],"Experiential-Skills,":[110],"Curated-Skills,":[111],"Comprehensive-Skills":[113],"ablation.":[114],"In":[115],"offensive":[116],"cybersecurity,":[117],"domain":[119],"deeply":[121],"covered":[122],"existing":[124],"benchmarks,":[126],"marginal":[128,205],"benefit":[129,206],"collapses.":[132],"spread":[134],"between":[135],"no-Skills":[137],"full-Skills":[139],"is":[141,173],"only":[142],"8.9~pp":[143],"($p":[144],"=":[145,149],"0.71$,":[146],"$\u03c7^2$;":[147],"$p":[148],"0.25$,":[150],"Cochran--Armitage":[151],"trend":[152],"test;":[153],"five":[154],"six":[156],"pairwise":[157],"Cohen's":[158],"$h$":[159],"values":[160],"fall":[161],"below":[162],"$0.2$":[164],"small-effect":[165],"threshold).":[166],"argue":[168],"missing":[171],"variable":[172],"\\emph{environment-feedback":[174],"bandwidth}.":[175],"When":[176],"agent's":[178],"tool":[179],"layer":[180],"returns":[181],"strict,":[182],"schema-validated,":[183],"low-latency":[184],"observations,":[185],"environment":[187],"itself":[188],"supplies":[189],"correction":[192],"signal":[193],"normally":[197],"needed":[198],"provide.":[200],"As":[201],"result,":[203],"curated":[208],"diminishes":[210],"substantially,":[211],"and,":[212],"in":[213],"some":[214],"cases":[215],"(e.g.,":[216],"our":[217],"timing":[218],"side-channel":[219],"setting),":[220],"actively":[221],"degrades":[222],"performance.":[223],"articulate":[225],"falsifiable":[227],"hypothesis,":[228],"sketch":[229],"its":[230],"design":[231],"implications":[232],"compound":[234],"AI":[235],"systems,":[236],"will":[238],"release":[239],"reanalysis":[241],"pipeline":[242],"support":[244],"replication.":[245]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
