{"id":"https://openalex.org/W4415035565","doi":"https://doi.org/10.48550/arxiv.2505.19915","title":"Evaluating AI cyber capabilities with crowdsourced elicitation","display_name":"Evaluating AI cyber capabilities with crowdsourced elicitation","publication_year":2025,"publication_date":"2025-05-26","ids":{"openalex":"https://openalex.org/W4415035565","doi":"https://doi.org/10.48550/arxiv.2505.19915"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2505.19915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.19915","pdf_url":"https://arxiv.org/pdf/2505.19915","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.19915","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115015965","display_name":"Artem Petrov","orcid":"https://orcid.org/0009-0004-0921-5638"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Petrov, Artem","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5113923276","display_name":"Dmitrii Volkov","orcid":"https://orcid.org/0009-0008-6560-6261"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Volkov, Dmitrii","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5115015965"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.9340000152587891,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.9340000152587891,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9190000295639038,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9175999760627747,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/expert-elicitation","display_name":"Expert elicitation","score":0.7712000012397766},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.7042999863624573},{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.5888000130653381},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5626999735832214},{"id":"https://openalex.org/keywords/situation-awareness","display_name":"Situation awareness","score":0.4900999963283539},{"id":"https://openalex.org/keywords/requirements-elicitation","display_name":"Requirements elicitation","score":0.47110000252723694},{"id":"https://openalex.org/keywords/flagging","display_name":"Flagging","score":0.4336000084877014},{"id":"https://openalex.org/keywords/situational-ethics","display_name":"Situational ethics","score":0.36660000681877136}],"concepts":[{"id":"https://openalex.org/C72161134","wikidata":"https://www.wikidata.org/wiki/Q5421219","display_name":"Expert elicitation","level":2,"score":0.7712000012397766},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.7042999863624573},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6291999816894531},{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.5888000130653381},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5626999735832214},{"id":"https://openalex.org/C145804949","wikidata":"https://www.wikidata.org/wiki/Q478123","display_name":"Situation awareness","level":2,"score":0.4900999963283539},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.47200000286102295},{"id":"https://openalex.org/C45384764","wikidata":"https://www.wikidata.org/wiki/Q838667","display_name":"Requirements elicitation","level":4,"score":0.47110000252723694},{"id":"https://openalex.org/C2777548347","wikidata":"https://www.wikidata.org/wiki/Q5456937","display_name":"Flagging","level":2,"score":0.4336000084877014},{"id":"https://openalex.org/C9114305","wikidata":"https://www.wikidata.org/wiki/Q1428317","display_name":"Situational ethics","level":2,"score":0.36660000681877136},{"id":"https://openalex.org/C39389867","wikidata":"https://www.wikidata.org/wiki/Q380767","display_name":"Corporate governance","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.362199991941452},{"id":"https://openalex.org/C46934059","wikidata":"https://www.wikidata.org/wiki/Q61515","display_name":"Outsourcing","level":2,"score":0.3368000090122223},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.3203999996185303},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.3091999888420105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30410000681877136},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2800000011920929},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C2778689934","wikidata":"https://www.wikidata.org/wiki/Q1313396","display_name":"Headline","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2505.19915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.19915","pdf_url":"https://arxiv.org/pdf/2505.19915","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.19915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.19915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.19915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.19915","pdf_url":"https://arxiv.org/pdf/2505.19915","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"AI":[1,73,82,93,144,167],"systems":[2],"become":[3],"increasingly":[4],"capable,":[5],"understanding":[6],"their":[7,25],"offensive":[8],"cyber":[9,172],"potential":[10],"is":[11,43,151],"critical":[12],"for":[13,106,136],"informed":[14],"governance":[15],"and":[16,27,47,87,103],"responsible":[17],"deployment.":[18],"However,":[19],"it's":[20],"hard":[21],"to":[22,66,125,154],"accurately":[23],"bound":[24],"capabilities,":[26],"some":[28],"prior":[29],"evaluations":[30],"dramatically":[31],"underestimated":[32],"them.":[33],"The":[34,78,92],"art":[35],"of":[36,109,142,148,179],"extracting":[37],"maximum":[38],"task-specific":[39],"performance":[40,97,115,157],"from":[41,181],"AIs":[42],"called":[44],"\"AI":[45],"elicitation\",":[46],"today's":[48],"safety":[49],"organizations":[50],"typically":[51],"conduct":[52],"it":[53],"in-house.":[54],"In":[55],"this":[56],"paper,":[57],"we":[58,164],"explore":[59],"crowdsourcing":[60],"elicitation":[61,68,119,130],"efforts":[62],"as":[63,132],"an":[64,122],"alternative":[65],"in-house":[67,126],"work.":[69],"We":[70,128],"host":[71],"open-access":[72],"tracks":[74],"at":[75,98,159],"two":[76],"Capture":[77],"Flag":[79],"(CTF)":[80],"competitions:":[81],"vs.":[83],"Humans":[84],"(400":[85],"teams)":[86],"Cyber":[88],"Apocalypse":[89],"(8000":[90],"teams).":[91],"teams":[94],"achieve":[95],"outstanding":[96],"both":[99],"events,":[100],"ranking":[101],"top-5%":[102],"top-10%":[104],"respectively":[105],"a":[107,133,182],"total":[108],"\\$7500":[110],"in":[111],"bounties.":[112],"This":[113],"impressive":[114],"suggests":[116],"that":[117,166],"open-market":[118],"may":[120],"offer":[121],"effective":[123],"complement":[124],"elicitation.":[127],"propose":[129],"bounties":[131],"practical":[134],"mechanism":[135],"maintaining":[137],"timely,":[138],"cost-effective":[139],"situational":[140],"awareness":[141],"emerging":[143],"capabilities.":[145],"Another":[146],"advantage":[147],"open":[149],"elicitations":[150],"the":[152],"option":[153],"collect":[155],"human":[156,184],"data":[158],"scale.":[160],"Applying":[161],"METR's":[162],"methodology,":[163],"found":[165],"agents":[168],"can":[169],"reliably":[170],"solve":[171],"challenges":[173],"requiring":[174],"one":[175],"hour":[176],"or":[177],"less":[178],"effort":[180],"median":[183],"CTF":[185],"participant.":[186]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
