{"id":"https://openalex.org/W7162091758","doi":"https://doi.org/10.48550/arxiv.2605.22321","title":"Benchmarking Autonomous Agents against Temporal, Spatial, and Semantic Evasions","display_name":"Benchmarking Autonomous Agents against Temporal, Spatial, and Semantic Evasions","publication_year":2026,"publication_date":"2026-05-21","ids":{"openalex":"https://openalex.org/W7162091758","doi":"https://doi.org/10.48550/arxiv.2605.22321"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22321","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22321","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22321","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101852566","display_name":"Jianan Ma","orcid":"https://orcid.org/0009-0007-0448-1218"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jianan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067405541","display_name":"Xiaohu Du","orcid":"https://orcid.org/0000-0003-4455-3128"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xiaohu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136208728","display_name":"Ruixiao Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Ruixiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136767142","display_name":"Yaoxiang Bian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bian, Yaoxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101864441","display_name":"Jialuo Chen","orcid":"https://orcid.org/0000-0003-4322-4285"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jialuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136802664","display_name":"Jingyi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jingyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136808519","display_name":"Xiaofang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiaofang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136814384","display_name":"Shiwen Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Shiwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136765825","display_name":"Changhua Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Changhua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136806182","display_name":"Xinhao Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Xinhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136781476","display_name":"Zhen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.25769999623298645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.25769999623298645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2549000084400177,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.17630000412464142,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/evasion","display_name":"Evasion (ethics)","score":0.7405999898910522},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6717000007629395},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5845000147819519},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5090000033378601},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.4918999969959259},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.48590001463890076},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.460099995136261},{"id":"https://openalex.org/keywords/stateless-protocol","display_name":"Stateless protocol","score":0.45170000195503235},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.3675999939441681}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7646999955177307},{"id":"https://openalex.org/C2781251061","wikidata":"https://www.wikidata.org/wiki/Q5416089","display_name":"Evasion (ethics)","level":3,"score":0.7405999898910522},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6717000007629395},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5845000147819519},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.5728999972343445},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5090000033378601},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.4918999969959259},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.48590001463890076},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.460099995136261},{"id":"https://openalex.org/C103613024","wikidata":"https://www.wikidata.org/wiki/Q230924","display_name":"Stateless protocol","level":3,"score":0.45170000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39989998936653137},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.3625999987125397},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3409999907016754},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.30309998989105225},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.28119999170303345},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22321","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22321","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22321","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22321","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7858046889305115,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"autonomous":[1,163],"agents":[2],"(e.g.,":[3],"OpenClaw)":[4],"increasingly":[5],"operate":[6],"with":[7,127],"deep":[8],"system-level":[9],"privileges":[10],"to":[11,153,170,180],"execute":[12],"complex":[13,82],"tasks,":[14],"they":[15],"introduce":[16,59],"severe,":[17],"unmitigated":[18],"security":[19],"risks.":[20],"Current":[21],"vulnerability":[22],"analyses":[23],"overwhelmingly":[24],"focus":[25],"on":[26],"single-turn,":[27],"stateless":[28],"behaviors,":[29],"overlooking":[30],"the":[31,144,181],"expanded":[32],"attack":[33,62],"surface":[34],"inherent":[35],"in":[36,161],"stateful,":[37],"multi-turn":[38],"interactions":[39],"and":[40,91],"dynamic":[41],"tool":[42],"invocations.":[43],"In":[44],"this":[45],"paper,":[46],"we":[47,108,137],"propose":[48],"a":[49,111,121,150],"novel,":[50],"multi-dimensional":[51],"evasion":[52,141],"framework":[53,124,142],"targeting":[54],"LLM-based":[55],"agent":[56,117,123,164],"systems.":[57],"We":[58],"three":[60],"stealthy":[61],"vectors:":[63],"(1)":[64],"Temporal":[65],"evasion,":[66,77,94],"which":[67,78,95],"fragments":[68],"malicious":[69,97],"payloads":[70,80],"across":[71],"sequential":[72],"interaction":[73],"turns;":[74],"(2)":[75],"Spatial":[76],"conceals":[79],"within":[81],"external":[83],"artifacts":[84],"that":[85,139,166],"evade":[86],"standard":[87,122],"LLM":[88,130],"parsing":[89],"mechanisms;":[90],"(3)":[92],"Semantic":[93],"obscures":[96],"intents":[98],"beneath":[99],"benign":[100],"contextual":[101],"noise.":[102],"To":[103],"systematically":[104],"quantify":[105],"these":[106],"threats,":[107],"construct":[109],"A3S-Bench,":[110],"comprehensive":[112],"benchmark":[113],"comprising":[114],"2,254":[115],"real-world":[116],"execution":[118],"trajectories.":[119],"Evaluating":[120],"separately":[125],"integrated":[126],"10":[128],"mainstream":[129],"backbones":[131],"against":[132],"20":[133],"practical":[134],"threat":[135],"scenarios,":[136],"demonstrate":[138],"our":[140],"elevates":[143],"average":[145],"risk":[146],"trigger":[147],"rate":[148],"from":[149],"28.3\\%":[151],"baseline":[152],"52.6\\%.":[154],"These":[155],"findings":[156],"reveal":[157],"systemic,":[158],"architecture-level":[159],"vulnerabilities":[160],"current":[162],"systems":[165],"existing":[167],"defenses":[168],"fail":[169],"address,":[171],"highlighting":[172],"an":[173],"urgent":[174],"need":[175],"for":[176],"defense":[177],"mechanisms":[178],"tailored":[179],"unique":[182],"threats.":[183]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-23T00:00:00"}
