{"id":"https://openalex.org/W7162080190","doi":"https://doi.org/10.1145/3786335.3813160","title":"The Verifier Tax: Horizon Dependent Safety--Success Tradeoffs in Tool Using LLM Agents","display_name":"The Verifier Tax: Horizon Dependent Safety--Success Tradeoffs in Tool Using LLM Agents","publication_year":2026,"publication_date":"2026-05-22","ids":{"openalex":"https://openalex.org/W7162080190","doi":"https://doi.org/10.1145/3786335.3813160"},"language":null,"primary_location":{"id":"doi:10.1145/3786335.3813160","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786335.3813160","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Conference on AI and Agentic Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3786335.3813160","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5096940264","display_name":"Tanmay Sah","orcid":"https://orcid.org/0009-0004-8583-2208"},"institutions":[{"id":"https://openalex.org/I153151563","display_name":"Harrisburg University of Science and Technology","ror":"https://ror.org/02g0s4z48","country_code":"US","type":"education","lineage":["https://openalex.org/I153151563"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tanmay Sah","raw_affiliation_strings":["Data Science, Harrisburg University of Science and Technology, Harrisburg, Pennsylvania, USA"],"raw_orcid":"https://orcid.org/0009-0004-8583-2208","affiliations":[{"raw_affiliation_string":"Data Science, Harrisburg University of Science and Technology, Harrisburg, Pennsylvania, USA","institution_ids":["https://openalex.org/I153151563"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136742224","display_name":"Vishal Srivastava","orcid":"https://orcid.org/0000-0002-5651-8365"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vishal Srivastava","raw_affiliation_strings":["Johns Hopkins University, Baltimore, Maryland, USA"],"raw_orcid":"https://orcid.org/0000-0002-5651-8365","affiliations":[{"raw_affiliation_string":"Johns Hopkins University, Baltimore, Maryland, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130601804","display_name":"Dolly Sah","orcid":null},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dolly Sah","raw_affiliation_strings":["University of Utah, Salt Lake City, Utah, USA"],"raw_orcid":"https://orcid.org/0009-0008-9190-9143","affiliations":[{"raw_affiliation_string":"University of Utah, Salt Lake City, Utah, USA","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130389396","display_name":"Kayden Jordan","orcid":null},"institutions":[{"id":"https://openalex.org/I153151563","display_name":"Harrisburg University of Science and Technology","ror":"https://ror.org/02g0s4z48","country_code":"US","type":"education","lineage":["https://openalex.org/I153151563"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kayden Jordan","raw_affiliation_strings":["Harrisburg University of Science and Technology, Harrisburg, Pennsylvania, USA"],"raw_orcid":"https://orcid.org/0000-0001-7894-478X","affiliations":[{"raw_affiliation_string":"Harrisburg University of Science and Technology, Harrisburg, Pennsylvania, USA","institution_ids":["https://openalex.org/I153151563"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81335169,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"785","last_page":"799"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.15389999747276306,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.15389999747276306,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.14810000360012054,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0835999995470047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6625000238418579},{"id":"https://openalex.org/keywords/enforcement","display_name":"Enforcement","score":0.5681999921798706},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.557699978351593},{"id":"https://openalex.org/keywords/liveness","display_name":"Liveness","score":0.54339998960495},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.5389000177383423},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.46380001306533813},{"id":"https://openalex.org/keywords/hallucinating","display_name":"Hallucinating","score":0.4537000060081482},{"id":"https://openalex.org/keywords/mediation","display_name":"Mediation","score":0.4537000060081482},{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.4327999949455261},{"id":"https://openalex.org/keywords/ranging","display_name":"Ranging","score":0.38429999351501465}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6966000199317932},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6625000238418579},{"id":"https://openalex.org/C2779777834","wikidata":"https://www.wikidata.org/wiki/Q4202277","display_name":"Enforcement","level":2,"score":0.5681999921798706},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.557699978351593},{"id":"https://openalex.org/C15569618","wikidata":"https://www.wikidata.org/wiki/Q3561421","display_name":"Liveness","level":2,"score":0.54339998960495},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.5389000177383423},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.46380001306533813},{"id":"https://openalex.org/C179420905","wikidata":"https://www.wikidata.org/wiki/Q223871","display_name":"Mediation","level":2,"score":0.4537000060081482},{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.4537000060081482},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.413100004196167},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.38429999351501465},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.35569998621940613},{"id":"https://openalex.org/C202973057","wikidata":"https://www.wikidata.org/wiki/Q7380130","display_name":"Runtime verification","level":3,"score":0.3269999921321869},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.31940001249313354},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C156325361","wikidata":"https://www.wikidata.org/wiki/Q1152864","display_name":"Grounded theory","level":3,"score":0.3167000114917755},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3125999867916107},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C2781251061","wikidata":"https://www.wikidata.org/wiki/Q5416089","display_name":"Evasion (ethics)","level":3,"score":0.30169999599456787},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.30160000920295715},{"id":"https://openalex.org/C2780224610","wikidata":"https://www.wikidata.org/wiki/Q1530061","display_name":"Credibility","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2903999984264374},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C145804949","wikidata":"https://www.wikidata.org/wiki/Q478123","display_name":"Situation awareness","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.27790001034736633},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.27709999680519104},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.27160000801086426},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.26190000772476196}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3786335.3813160","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786335.3813160","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Conference on AI and Agentic Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3786335.3813160","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786335.3813160","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Conference on AI and Agentic Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5093140006065369,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W4412877164","https://openalex.org/W4415797253","https://openalex.org/W7133214447","https://openalex.org/W7133222210"],"related_works":[],"abstract_inverted_index":{"We":[0,43,100],"study":[1],"how":[2],"runtime":[3,150],"enforcement":[4,151],"against":[5],"unsafe":[6,64,104],"actions":[7,126],"affects":[8],"end-to-end":[9],"task":[10],"performance":[11],"in":[12,97,135,142],"multi-step":[13],"tool":[14],"using":[15],"large":[16],"language":[17],"model":[18,45],"(LLM)":[19],"agents.":[20],"Using":[21],"\u03c4":[22],"-bench":[23],"across":[24],"Airline":[25],"and":[26,35,41,51,63,160,178],"Retail":[27,144],"domains,":[28],"we":[29],"compare":[30],"baseline":[31],"Tool-Calling,":[32],"planning-integrated":[33],"(Triad),":[34],"policy-mediated":[36],"(Triad-Safety)":[37],"architectures":[38],"with":[39],"GPT-OSS-20B":[40,134],"GLM-4-9B.":[42],"identify":[44],"dependent":[46],"interaction":[47],"horizons":[48],"(15\u201330":[49],"turns)":[50],"decompose":[52],"outcomes":[53],"into":[54,89],"overall":[55],"success":[56,60,65,105],"rate":[57,61,66],"(SR),":[58],"safe":[59,91,165],"(SSR),":[62],"(USR).":[67],"Our":[68],"results":[69,147],"reveal":[70],"a":[71,153],"persistent":[72],"\u201cSafety-Capability":[73],"Gap\u201d.":[74],"While":[75],"safety":[76],"mediation":[77],"can":[78],"intercept":[79],"up":[80],"to":[81,118,139],"94%":[82],"of":[83,174],"non-compliant":[84],"actions,":[85],"it":[86],"rarely":[87],"translates":[88],"strictly":[90],"goal":[92],"attainment":[93],"(SSR":[94],"<":[95],"5%":[96],"most":[98],"settings).":[99],"find":[101],"that":[102,149],"high":[103],"rates":[106,123],"are":[107,127],"primarily":[108],"driven":[109],"by":[110],"\u201cIntegrity":[111],"Leaks,\u201d":[112],"where":[113],"models":[114],"hallucinate":[115],"user":[116],"identifiers":[117],"bypass":[119],"mandatory":[120],"authentication.":[121],"Recovery":[122],"following":[124],"blocked":[125],"consistently":[128],"low,":[129],"ranging":[130],"from":[131],"21%":[132],"for":[133,171],"simpler":[136],"procedural":[137],"tasks":[138],"near":[140],"0%":[141],"complex":[143],"scenarios.":[145],"These":[146],"demonstrate":[148],"imposes":[152],"significant":[154],"\u201cverifier":[155],"tax\u201d":[156],"on":[157],"conversational":[158],"length":[159],"compute":[161],"cost":[162],"without":[163],"guaranteeing":[164],"completion,":[166],"highlighting":[167],"the":[168],"critical":[169],"need":[170],"agents":[172],"capable":[173],"grounded":[175],"identity":[176],"verification":[177],"post-intervention":[179],"reasoning.":[180]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
