{"id":"https://openalex.org/W7140168421","doi":"https://doi.org/10.48550/arxiv.2603.20449","title":"Solver-Aided Verification of Policy Compliance in Tool-Augmented LLM Agents","display_name":"Solver-Aided Verification of Policy Compliance in Tool-Augmented LLM Agents","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7140168421","doi":"https://doi.org/10.48550/arxiv.2603.20449"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.20449","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20449","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.20449","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Winston, Cailin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winston, Cailin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Winston, Claris","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winston, Claris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Just, Ren\u00e9","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Just, Ren\u00e9","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.16329999268054962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.16329999268054962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12240000069141388,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.09049999713897705,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5623999834060669},{"id":"https://openalex.org/keywords/compliance","display_name":"Compliance (psychology)","score":0.5616999864578247},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5284000039100647},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.38040000200271606},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.37770000100135803},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.3521000146865845},{"id":"https://openalex.org/keywords/temporal-logic","display_name":"Temporal logic","score":0.3301999866962433}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6197999715805054},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5623999834060669},{"id":"https://openalex.org/C2781460075","wikidata":"https://www.wikidata.org/wiki/Q1399332","display_name":"Compliance (psychology)","level":2,"score":0.5616999864578247},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5284000039100647},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.5249000191688538},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.491100013256073},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4293000102043152},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.38040000200271606},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C25016198","wikidata":"https://www.wikidata.org/wiki/Q781833","display_name":"Temporal logic","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C123587114","wikidata":"https://www.wikidata.org/wiki/Q2101508","display_name":"Policy analysis","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.290800005197525},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.26899999380111694},{"id":"https://openalex.org/C109986646","wikidata":"https://www.wikidata.org/wiki/Q546113","display_name":"Public policy","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C2779777834","wikidata":"https://www.wikidata.org/wiki/Q4202277","display_name":"Enforcement","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2549999952316284},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.20449","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20449","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.20449","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20449","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6341015100479126,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tool-augmented":[0],"Large":[1],"Language":[2],"Models":[3],"(TaLLMs)":[4],"extend":[5],"LLMs":[6,58],"with":[7,18,45],"the":[8,68,132,135,142,149,156],"ability":[9],"to":[10,16,59,61,91,106,141],"invoke":[11],"external":[12],"tools,":[13],"enabling":[14],"them":[15],"interact":[17],"real-world":[19],"environments.":[20],"However,":[21],"a":[22,40,139],"major":[23],"limitation":[24],"in":[25,28,67,96],"deploying":[26],"TaLLMs":[27],"sensitive":[29],"applications":[30],"such":[31],"as":[32,138],"customer":[33],"service":[34],"and":[35,51,119,129,159,188],"business":[36],"process":[37],"automation":[38],"is":[39],"lack":[41],"of":[42],"reliable":[43],"compliance":[44,95,187],"domain-specific":[46],"operational":[47],"policies":[48,62,110],"regarding":[49],"tool-use":[50,93,109],"agent":[52],"behavior.":[53],"Current":[54],"approaches":[55],"merely":[56],"steer":[57],"adhere":[60],"by":[63],"including":[64],"policy":[65,77,94,150,163,166,186],"descriptions":[66],"LLM":[69],"context,":[70],"but":[71],"these":[72],"provide":[73],"no":[74],"guarantees":[75],"that":[76,147,161,176],"violations":[78,167],"will":[79],"be":[80],"prevented.":[81],"In":[82],"this":[83],"paper,":[84],"we":[85,100],"introduce":[86],"an":[87,102],"SMT":[88],"solver-aided":[89,162],"framework":[90],"enforce":[92],"TaLLM":[97,181],"agents.":[98],"Specifically,":[99],"use":[101],"LLM-assisted,":[103],"human-guided":[104],"approach":[105],"translate":[107],"natural-language-specified":[108],"into":[111,180],"formal":[112,178],"logic":[113],"(SMT-LIB-2.0)":[114],"constraints":[115,133],"over":[116],"agent-observable":[117],"state":[118],"tool":[120,125,143],"arguments.":[121],"At":[122],"runtime,":[123],"planned":[124],"calls":[126],"are":[127,151],"intercepted":[128],"checked":[130],"against":[131],"using":[134],"Z3":[136],"solver":[137],"pre-condition":[140],"call.":[144],"Tool":[145],"invocations":[146],"violate":[148],"blocked.":[152],"We":[153],"evaluated":[154],"on":[155],"TauBench":[157],"benchmark":[158],"demonstrate":[160],"checking":[164],"reduces":[165],"while":[168],"maintaining":[169],"overall":[170,189],"task":[171],"accuracy.":[172],"These":[173],"results":[174],"suggest":[175],"integrating":[177],"reasoning":[179],"execution":[182],"can":[183],"improve":[184],"tool-call":[185],"reliability.":[190]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-25T00:00:00"}
