{"id":"https://openalex.org/W7157531754","doi":"https://doi.org/10.48550/arxiv.2604.25562","title":"SnapGuard: Lightweight Prompt Injection Detection for Screenshot-Based Web Agents","display_name":"SnapGuard: Lightweight Prompt Injection Detection for Screenshot-Based Web Agents","publication_year":2026,"publication_date":"2026-04-28","ids":{"openalex":"https://openalex.org/W7157531754","doi":"https://doi.org/10.48550/arxiv.2604.25562"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.25562","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25562","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.25562","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125413885","display_name":"Mengyao Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Mengyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134846870","display_name":"Han Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006833163","display_name":"Haokai Ma","orcid":"https://orcid.org/0000-0002-4621-5213"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Haokai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134847937","display_name":"Jiahao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiahao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134845713","display_name":"Kai Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134852753","display_name":"Quanjun Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Quanjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134834070","display_name":"Ee-Chien Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Ee-Chien","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.2506999969482422,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.2506999969482422,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.20880000293254852,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.1965000033378601,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.7574999928474426},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6549999713897705},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6093000173568726},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5406000018119812},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.47620001435279846},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.38040000200271606},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.31839999556541443}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8118000030517578},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.7574999928474426},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6549999713897705},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6093000173568726},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5406000018119812},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.47620001435279846},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.453900009393692},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40400001406669617},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.38040000200271606},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3481999933719635},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C2983909278","wikidata":"https://www.wikidata.org/wiki/Q6368","display_name":"Web browser","level":3,"score":0.2985000014305115},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28380000591278076},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27730000019073486},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C11392498","wikidata":"https://www.wikidata.org/wiki/Q11288","display_name":"Web server","level":3,"score":0.2711000144481659},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.2694000005722046},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.2639000117778778}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.25562","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25562","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.25562","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.25562","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4449630081653595,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Web":[0],"agents":[1],"have":[2,62],"emerged":[3],"as":[4,160],"an":[5,93,207],"effective":[6],"paradigm":[7],"for":[8,38],"automating":[9],"interactions":[10],"with":[11],"complex":[12],"web":[13,40],"environments,":[14],"yet":[15,152],"remain":[16],"vulnerable":[17],"to":[18,29,134],"prompt":[19,113,157],"injection":[20,114,158],"attacks":[21,115,198],"that":[22,127,155,176,204],"embed":[23],"malicious":[24,184],"instructions":[25],"into":[26],"webpage":[27,165],"content":[28],"induce":[30],"unintended":[31],"actions.":[32],"This":[33,105],"threat":[34],"is":[35],"further":[36],"amplified":[37],"screenshot-based":[39],"agents,":[41],"which":[42],"operate":[43],"on":[44,68,144],"rendered":[45],"visual":[46,139,173],"webpages":[47,129],"rather":[48],"than":[49],"structured":[50],"textual":[51,141,188],"representations,":[52],"making":[53],"predominant":[54],"text-centric":[55],"defenses":[56],"ineffective.":[57],"Although":[58],"multimodal":[59,161],"detection":[60,159],"methods":[61],"been":[63],"explored,":[64],"they":[65],"often":[66],"rely":[67],"large":[69],"vision-language":[70],"models":[71],"(VLMs),":[72],"incurring":[73],"significant":[74],"computational":[75],"overhead.":[76,226],"The":[77],"bottleneck":[78],"lies":[79],"in":[80,97,118],"the":[81,89],"complexity":[82],"of":[83,92,210],"modern":[84],"webpages:":[85],"VLMs":[86],"must":[87],"comprehend":[88],"global":[90],"semantics":[91],"entire":[94],"page,":[95],"resulting":[96],"substantial":[98],"inference":[99],"time":[100],"and":[101,140,186,199,221],"GPU":[102],"memory":[103,225],"usage.":[104],"raises":[106],"a":[107,119,150,172],"critical":[108],"question:":[109],"can":[110],"we":[111,125,147],"detect":[112],"from":[116,137],"screenshots":[117],"lightweight":[120,151],"manner?":[121],"In":[122],"this":[123,145],"paper,":[124],"observe":[126],"injected":[128],"exhibit":[130],"distinct":[131],"characteristics":[132],"compared":[133],"benign":[135,201],"ones":[136],"both":[138],"perspectives.":[142],"Building":[143],"insight,":[146],"propose":[148],"SnapGuard,":[149],"accurate":[153],"method":[154],"reformulates":[156],"representation":[162],"analysis":[163],"over":[164],"screenshots.":[166],"SnapGuard":[167,205],"leverages":[168],"two":[169,200],"complementary":[170],"signals:":[171],"stability":[174],"indicator":[175],"identifies":[177],"abnormally":[178],"smooth":[179],"gradient":[180],"distributions":[181],"induced":[182],"by":[183],"content,":[185],"action-oriented":[187],"signals":[189],"recovered":[190],"via":[191],"contrast-polarity":[192],"reversal.":[193],"Extensive":[194],"evaluations":[195],"across":[196],"eight":[197],"settings":[202],"demonstrate":[203],"achieves":[206],"F1":[208],"score":[209],"0.75,":[211],"outperforming":[212],"GPT-4o-prompt":[213],"while":[214],"being":[215],"8x":[216],"faster":[217],"(1.81s":[218],"vs.":[219],"14.50s)":[220],"introducing":[222],"no":[223],"additional":[224]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-30T00:00:00"}
