{"id":"https://openalex.org/W7117587198","doi":"https://doi.org/10.1145/3733799.3762980","title":"How Not to Detect Prompt Injections with an LLM","display_name":"How Not to Detect Prompt Injections with an LLM","publication_year":2025,"publication_date":"2025-10-13","ids":{"openalex":"https://openalex.org/W7117587198","doi":"https://doi.org/10.1145/3733799.3762980"},"language":null,"primary_location":{"id":"doi:10.1145/3733799.3762980","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3733799.3762980","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM Workshop on Artificial Intelligence and Security","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3733799.3762980","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Sarthak Choudhary","orcid":"https://orcid.org/0000-0003-1881-3408"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sarthak Choudhary","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA"],"raw_orcid":"https://orcid.org/0000-0003-1881-3408","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109022118","display_name":"Divyam Anshumaan","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divyam Anshumaan","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA"],"raw_orcid":"https://orcid.org/0009-0008-6324-0315","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053586439","display_name":"Nils Palumbo","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nils Palumbo","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA"],"raw_orcid":"https://orcid.org/0009-0000-7451-0976","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103835847","display_name":"Somesh Jha","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Somesh Jha","raw_affiliation_strings":["Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA"],"raw_orcid":"https://orcid.org/0000-0001-5877-0436","affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, USA","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":6.5198,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.96827799,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"218","last_page":"229"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.5885999798774719,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.5885999798774719,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.11429999768733978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10951","display_name":"Cryptographic Implementations and Security","score":0.06849999725818634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7940999865531921},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.6550999879837036},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.5867000222206116},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.45159998536109924},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3199000060558319}],"concepts":[{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7940999865531921},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.6550999879837036},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6496999859809875},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.5867000222206116},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.45159998536109924},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.44859999418258667},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3199000060558319},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29660001397132874},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.28380000591278076},{"id":"https://openalex.org/C167063184","wikidata":"https://www.wikidata.org/wiki/Q1400839","display_name":"Vulnerability assessment","level":3,"score":0.2766000032424927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3733799.3762980","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3733799.3762980","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM Workshop on Artificial Intelligence and Security","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3733799.3762980","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3733799.3762980","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM Workshop on Artificial Intelligence and Security","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1873763122","https://openalex.org/W2090903439","https://openalex.org/W2119047901","https://openalex.org/W2124344619","https://openalex.org/W2135930857","https://openalex.org/W2139172211","https://openalex.org/W2154909745","https://openalex.org/W4388886073","https://openalex.org/W4389520756","https://openalex.org/W4402217583","https://openalex.org/W4404612289","https://openalex.org/W4405181744","https://openalex.org/W4411337880"],"related_works":[],"abstract_inverted_index":{"LLM-integrated":[0],"applications":[1],"and":[2,71],"agents":[3],"are":[4],"vulnerable":[5],"to":[6,20,43,53,58,121],"prompt":[7,59],"injection":[8,60],"attacks,":[9],"where":[10],"adversaries":[11],"embed":[12],"malicious":[13,109],"instructions":[14],"within":[15],"seemingly":[16],"benign":[17],"input":[18,45],"data":[19,46],"manipulate":[21],"the":[22,55,68,122],"LLM\u2019s":[23,41],"intended":[24],"behavior.":[25],"Recent":[26],"defenses":[27],"based":[28],"on":[29],"known-answer":[30],"detection":[31,100],"(KAD)":[32],"scheme":[33,70],"have":[34],"reported":[35],"near-perfect":[36],"performance":[37],"by":[38],"observing":[39],"an":[40,90],"output":[42],"classify":[44],"as":[47,61,102,104],"clean":[48],"or":[49,124],"contaminated.":[50],"KAD":[51,69,97],"attempts":[52],"repurpose":[54],"very":[56],"susceptibility":[57],"a":[62,73,112],"defensive":[63],"mechanism.":[64],"We":[65,128],"formally":[66],"characterize":[67],"uncover":[72],"structural":[74],"vulnerability":[75],"that":[76],"invalidates":[77],"its":[78],"core":[79],"security":[80],"premise.":[81],"To":[82],"exploit":[83],"this":[84],"fundamental":[85],"vulnerability,":[86],"we":[87],"methodically":[88],"design":[89],"adaptive":[91],"attack,":[92],"DataFlip.":[93],"It":[94],"consistently":[95],"evades":[96],"defenses,":[98],"achieving":[99],"rates":[101],"low":[103],"\\(0\\%\\)":[105],"while":[106],"reliably":[107],"inducing":[108],"behavior":[110],"with":[111],"success":[113],"rate":[114],"of":[115],"\\(91\\%\\)\u2014all":[116],"without":[117],"requiring":[118],"white-box":[119],"access":[120],"LLM":[123],"any":[125],"optimization":[126],"procedures.":[127],"release":[129],"our":[130],"evaluation":[131],"code":[132],"at":[133],"[10].":[134]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2025-12-31T23:11:33.660297","created_date":"2025-12-30T00:00:00"}
