{"id":"https://openalex.org/W7161716710","doi":"https://doi.org/10.48550/arxiv.2605.17453","title":"Trust No Tool: Evaluating and Defending LLM Agents under Untrusted Tool Feedback","display_name":"Trust No Tool: Evaluating and Defending LLM Agents under Untrusted Tool Feedback","publication_year":2026,"publication_date":"2026-05-17","ids":{"openalex":"https://openalex.org/W7161716710","doi":"https://doi.org/10.48550/arxiv.2605.17453"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17453","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17453","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17453","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038559282","display_name":"\u95eb\u4e50\u6210","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Lecheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136474868","display_name":"Ruizhe Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ruizhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136498130","display_name":"Xicheng Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Xicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130183446","display_name":"Wenxi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136462978","display_name":"Binwu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Binwu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136501567","display_name":"Longyue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Longyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136471724","display_name":"Chenyang Lyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyu, Chenyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136469078","display_name":"Guanhua Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Guanhua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.4327000081539154,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.4327000081539154,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.1128000020980835,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10927","display_name":"Access Control and Trust","score":0.09910000115633011,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.8170999884605408},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6740999817848206},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6184999942779541},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5788000226020813},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5503000020980835},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.5174999833106995}],"concepts":[{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.8170999884605408},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7189000248908997},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6740999817848206},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6184999942779541},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5788000226020813},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5503000020980835},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.5174999833106995},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.41100001335144043},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.39719998836517334},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.39169999957084656},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35899999737739563},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3068999946117401},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2775000035762787},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2696000039577484}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17453","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17453","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17453","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17453","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4502474069595337,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"},{"score":0.4450586438179016,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tool-using":[0],"LLM":[1],"agents":[2],"increasingly":[3],"rely":[4],"on":[5,177],"external":[6],"tools":[7],"to":[8,93,113,193],"make":[9],"consequential":[10],"decisions,":[11],"yet":[12],"most":[13],"existing":[14],"agent-security":[15],"benchmarks":[16],"and":[17,54,99,126,147,162,175,231],"defenses":[18],"implicitly":[19],"assume":[20],"that":[21,122,142,183],"tool":[22,28,44,116,206,218],"feedback":[23],"is":[24,112,212,225],"trustworthy":[25],"once":[26],"a":[27,34,42,75,102,198],"has":[29],"been":[30],"selected.":[31],"We":[32],"study":[33,69],"different":[35],"failure":[36],"mode,":[37],"cognitive":[38],"poisoning,":[39],"in":[40,151,204],"which":[41],"malicious":[43],"behaves":[45],"plausibly":[46],"during":[47],"exploration,":[48],"accumulates":[49],"trust":[50,224],"through":[51,233],"benign-looking":[52],"feedback,":[53],"becomes":[55],"harmful":[56],"only":[57,185],"when":[58],"hidden":[59],"state":[60],"conditions":[61],"align":[62],"with":[63,83],"the":[64,129,132,189,208,222,228,234],"final":[65,133,235],"executable":[66,134],"action.":[67,236],"To":[68],"this":[70,137,152],"setting,":[71],"we":[72],"construct":[73],"TRUST-Bench,":[74],"task-conditioned":[76],"benchmark":[77],"of":[78,131,188,201],"1,970":[79],"hidden-trigger":[80],"tool-compromise":[81],"episodes":[82],"matched":[84],"safe":[85],"controls,":[86],"introduce":[87],"an":[88],"asymmetric":[89],"penalty":[90],"metric,":[91],"GuardedJoint,":[92,170],"better":[94],"reflect":[95],"real":[96],"deployment":[97],"risk,":[98],"present":[100],"VISTA-Guard,":[101],"backbone-agnostic":[103],"framework":[104],"for":[105],"final-action":[106,156],"risk":[107,130],"scoring.":[108],"The":[109],"core":[110],"idea":[111],"abstract":[114],"multi-step":[115],"interaction":[117,229],"into":[118],"structured":[119],"environment":[120],"variables":[121],"encode":[123],"trust-formation":[124],"dynamics":[125],"then":[127],"score":[128],"action":[135],"from":[136],"trajectory-conditioned":[138],"representation.":[139],"Experiments":[140],"show":[141],"prompt-centric":[143],"heuristics,":[144],"scalarized":[145],"features,":[146],"zero-shot":[148],"judges":[149],"fail":[150],"regime,":[153],"whereas":[154],"trajectory-aware":[155],"scoring":[157],"yields":[158],"strong":[159],"in-domain":[160,174],"discrimination":[161],"remains":[163],"effective":[164],"under":[165],"balanced":[166,178],"out-of-distribution":[167,179],"transfer.":[168],"Under":[169],"VISTA-Guard":[171],"reaches":[172],"$84.2$":[173],"$56.9$":[176],"evaluation,":[180],"while":[181],"methods":[182],"optimize":[184],"one":[186],"side":[187],"safety--utility":[190],"tradeoff":[191],"collapse":[192],"zero.":[194],"These":[195],"findings":[196],"support":[197],"broader":[199],"view":[200],"agent":[202],"security":[203],"black-box":[205],"ecosystems:":[207],"decisive":[209],"defense":[210],"target":[211],"not":[213],"local":[214],"prompt":[215],"text":[216],"or":[217],"descriptors":[219],"alone,":[220],"but":[221],"way":[223],"formed":[226],"across":[227],"trajectory":[230],"committed":[232]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
