{"id":"https://openalex.org/W7077048516","doi":"https://doi.org/10.48550/arxiv.2507.20526","title":"Security Challenges in AI Agent Deployment: Insights from a Large Scale Public Competition","display_name":"Security Challenges in AI Agent Deployment: Insights from a Large Scale Public Competition","publication_year":2025,"publication_date":"2025-07-28","ids":{"openalex":"https://openalex.org/W7077048516","doi":"https://doi.org/10.48550/arxiv.2507.20526"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2507.20526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.20526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2507.20526","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zou, Andy","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zou, Andy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lin, Maxwell","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Maxwell","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jones, Eliot","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jones, Eliot","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Nowak, Micha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nowak, Micha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Dziemian, Mateusz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dziemian, Mateusz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Winter, Nick","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winter, Nick","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Grattan, Alexander","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grattan, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Nathanael, Valent","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nathanael, Valent","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Croft, Ayla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Croft, Ayla","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Davies, Xander","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Davies, Xander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Patel, Jai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patel, Jai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kirk, Robert","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kirk, Robert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Burnikell, Nate","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Burnikell, Nate","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Gal, Yarin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gal, Yarin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hendrycks, Dan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hendrycks, Dan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kolter, J. Zico","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kolter, J. Zico","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Fredrikson, Matt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fredrikson, Matt","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6593000292778015,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6593000292778015,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.03060000017285347,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14311","display_name":"Electrical and Electromagnetic Research","score":0.019200000911951065,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6549999713897705},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5758000016212463},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5522000193595886},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46549999713897705},{"id":"https://openalex.org/keywords/competition","display_name":"Competition (biology)","score":0.4277999997138977},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3955000042915344},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.3926999866962433},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.36719998717308044},{"id":"https://openalex.org/keywords/transferability","display_name":"Transferability","score":0.3580000102519989},{"id":"https://openalex.org/keywords/security-policy","display_name":"Security policy","score":0.33730000257492065}],"concepts":[{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6549999713897705},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6304000020027161},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5758000016212463},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.5730999708175659},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5522000193595886},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46549999713897705},{"id":"https://openalex.org/C91306197","wikidata":"https://www.wikidata.org/wiki/Q45767","display_name":"Competition (biology)","level":2,"score":0.4277999997138977},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3955000042915344},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C61272859","wikidata":"https://www.wikidata.org/wiki/Q7834031","display_name":"Transferability","level":3,"score":0.3580000102519989},{"id":"https://openalex.org/C154908896","wikidata":"https://www.wikidata.org/wiki/Q2167404","display_name":"Security policy","level":2,"score":0.33730000257492065},{"id":"https://openalex.org/C2778403875","wikidata":"https://www.wikidata.org/wiki/Q20312394","display_name":"Adversarial machine learning","level":3,"score":0.33309999108314514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.322299987077713},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C167981075","wikidata":"https://www.wikidata.org/wiki/Q2667186","display_name":"Sandbox (software development)","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C2780873155","wikidata":"https://www.wikidata.org/wiki/Q392811","display_name":"Agent-based model","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C60643870","wikidata":"https://www.wikidata.org/wiki/Q1949683","display_name":"Deterrence theory","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C46355384","wikidata":"https://www.wikidata.org/wiki/Q726686","display_name":"Compromise","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2702000141143799},{"id":"https://openalex.org/C2781198186","wikidata":"https://www.wikidata.org/wiki/Q701521","display_name":"Collusion","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C29122968","wikidata":"https://www.wikidata.org/wiki/Q1414816","display_name":"Incentive","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C5894958","wikidata":"https://www.wikidata.org/wiki/Q2297769","display_name":"Software agent","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C109986646","wikidata":"https://www.wikidata.org/wiki/Q546113","display_name":"Public policy","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2635999917984009},{"id":"https://openalex.org/C74072328","wikidata":"https://www.wikidata.org/wiki/Q1142726","display_name":"Intelligent agent","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25429999828338623},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.2526000142097473},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2507.20526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.20526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2507.20526","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.20526","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7002605199813843,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"have":[2],"enabled":[3],"LLM-powered":[4],"AI":[5,53,164],"agents":[6,54,113],"to":[7,29,48,88,177],"autonomously":[8],"execute":[9],"complex":[10],"tasks":[11],"by":[12],"combining":[13],"language":[14],"model":[15,140],"reasoning":[16],"with":[17,66,123],"tools,":[18],"memory,":[19],"and":[20,81,104,129,139,159,171,183],"web":[21],"access.":[22],"But":[23],"can":[24],"these":[25,86],"systems":[26],"be":[27],"trusted":[28],"follow":[30],"deployment":[31,58],"policies":[32],"in":[33,162],"realistic":[34,57],"environments,":[35],"especially":[36],"under":[37],"attack?":[38],"To":[39],"investigate,":[40],"we":[41,132,175],"ran":[42],"the":[43,90,168],"largest":[44],"public":[45],"red-teaming":[46],"competition":[47],"date,":[49],"targeting":[50],"22":[51],"frontier":[52],"across":[55,107,127],"44":[56],"scenarios.":[59],"Participants":[60],"submitted":[61],"1.8":[62],"million":[63],"prompt-injection":[64],"attacks,":[65],"over":[67],"60,000":[68],"successfully":[69],"eliciting":[70],"policy":[71,115],"violations":[72,116],"such":[73],"as":[74],"unauthorized":[75],"data":[76],"access,":[77],"illicit":[78],"financial":[79],"actions,":[80],"regulatory":[82],"noncompliance.":[83],"We":[84],"use":[85],"results":[87],"build":[89],"Agent":[91],"Red":[92],"Teaming":[93],"(ART)":[94],"benchmark":[95,170],"-":[96,103],"a":[97],"curated":[98],"set":[99],"of":[100],"high-impact":[101],"attacks":[102],"evaluate":[105],"it":[106],"19":[108],"state-of-the-art":[109],"models.":[110],"Nearly":[111],"all":[112],"exhibit":[114],"for":[117],"most":[118],"behaviors":[119],"within":[120],"10-100":[121],"queries,":[122],"high":[124],"attack":[125],"transferability":[126],"models":[128],"tasks.":[130],"Importantly,":[131],"find":[133],"limited":[134],"correlation":[135],"between":[136],"agent":[137,188],"robustness":[138],"size,":[141],"capability,":[142],"or":[143],"inference-time":[144],"compute,":[145],"suggesting":[146],"that":[147],"additional":[148],"defenses":[149],"are":[150],"needed":[151],"against":[152],"adversarial":[153],"misuse.":[154],"Our":[155],"findings":[156],"highlight":[157],"critical":[158],"persistent":[160],"vulnerabilities":[161],"today's":[163],"agents.":[165],"By":[166],"releasing":[167],"ART":[169],"accompanying":[172],"evaluation":[173],"framework,":[174],"aim":[176],"support":[178],"more":[179],"rigorous":[180],"security":[181],"assessment":[182],"drive":[184],"progress":[185],"toward":[186],"safer":[187],"deployment.":[189]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
