{"id":"https://openalex.org/W7117304657","doi":"https://doi.org/10.48550/arxiv.2512.20660","title":"The Dual-State Architecture for Reliable LLM Agents","display_name":"The Dual-State Architecture for Reliable LLM Agents","publication_year":2025,"publication_date":"2025-12-18","ids":{"openalex":"https://openalex.org/W7117304657","doi":"https://doi.org/10.48550/arxiv.2512.20660"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.20660","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.20660","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.20660","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004472412","display_name":"Matthew W. Thompson","orcid":"https://orcid.org/0000-0002-1460-3983"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Thompson, Matthew","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5004472412"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.25110000371932983,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.25110000371932983,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.1639000028371811,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.07940000295639038,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.48750001192092896},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.48100000619888306},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.44699999690055847},{"id":"https://openalex.org/keywords/unit-testing","display_name":"Unit testing","score":0.4212999939918518},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4194999933242798},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.41929998993873596},{"id":"https://openalex.org/keywords/source-lines-of-code","display_name":"Source lines of code","score":0.41909998655319214},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.38269999623298645},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.3804999887943268}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6669999957084656},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5705000162124634},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.48750001192092896},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.48100000619888306},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.44699999690055847},{"id":"https://openalex.org/C148027188","wikidata":"https://www.wikidata.org/wiki/Q907375","display_name":"Unit testing","level":3,"score":0.4212999939918518},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4194999933242798},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.41929998993873596},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.41909998655319214},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.38269999623298645},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3804999887943268},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.37130001187324524},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.3328000009059906},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.3077999949455261},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2969000041484833},{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.28450000286102295},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2797999978065491},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C174683762","wikidata":"https://www.wikidata.org/wiki/Q609588","display_name":"Component-based software engineering","level":4,"score":0.2644999921321869},{"id":"https://openalex.org/C4478048","wikidata":"https://www.wikidata.org/wiki/Q950250","display_name":"Test-driven development","level":4,"score":0.2614000141620636},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C186846655","wikidata":"https://www.wikidata.org/wiki/Q3398377","display_name":"Software construction","level":4,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.20660","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.20660","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.20660","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.20660","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5975728631019592,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"deployed":[3],"as":[4,41],"code":[5],"generation":[6,33,169,174],"agents":[7],"exhibit":[8],"stochastic":[9,32,63],"behavior":[10],"incompatible":[11],"with":[12,34,61,102,161],"the":[13,22,182],"deterministic":[14,35,58],"guarantees":[15],"required":[16],"by":[17],"software":[18,199],"engineering.":[19,200],"We":[20,65],"formalize":[21],"Dual-State":[23],"Action":[24],"Pair":[25],"(DSAP),":[26],"an":[27],"execution":[28,185,190],"primitive":[29],"that":[30,44,67],"couples":[31],"post-condition":[36],"verification.":[37],"Guard":[38],"functions":[39],"act":[40],"sensing":[42],"actions":[43],"project":[45],"opaque":[46],"LLM":[47],"outputs":[48],"onto":[49],"observable":[50],"workflow":[51],"state,":[52],"enabling":[53],"a":[54,89],"dual-state":[55],"decomposition:":[56],"finite,":[57],"S_workflow":[59],"paired":[60],"infinite,":[62],"S_env.":[64],"prove":[66],"for":[68,167,172,197],"epsilon-capable":[69],"generators,":[70],"failure":[71],"probability":[72],"P(fail)":[73],"&lt;=":[74],"(1-epsilon)^R_max":[75],"-&gt;":[76],"0.":[77],"To":[78],"prevent":[79],"naive":[80],"O(R^K)":[81],"retry":[82],"explosion":[83],"across":[84,116],"multi-step":[85],"workflows,":[86],"we":[87],"introduce":[88],"three-level":[90],"recovery":[91,163,191],"hierarchy:":[92],"context":[93,106,150],"refinement":[94],"(retry":[95],"within":[96],"step),":[97],"informed":[98],"backtracking":[99],"(stagnation":[100],"detection":[101],"cascade":[103],"invalidation":[104],"and":[105,111,176,187],"injection":[107,151],"to":[108,130],"upstream":[109],"steps),":[110],"human":[112],"escalation.":[113],"Experimental":[114],"validation":[115],"13":[117],"LLMs":[118],"(1.3B-15B":[119],"parameters)":[120],"on":[121,141],"three":[122],"diagnostic":[123],"probes":[124],"demonstrates":[125,148],"reliability":[126],"gains":[127],"of":[128],"up":[129],"66":[131],"percentage":[132],"points":[133],"at":[134],"1.2-2.1x":[135],"baseline":[136],"cost.":[137],"Recovery":[138],"mechanism":[139],"evaluation":[140],"99":[142],"SWE-Bench":[143],"Pro":[144],"instance-arm":[145],"pairs":[146],"(Qwen3-Coder-Next)":[147],"100%":[149],"effectiveness":[152],"(upstream":[153],"output":[154],"changed":[155],"in":[156],"all":[157],"71":[158],"escalation":[159],"events)":[160],"step-specific":[162],"asymmetry":[164],"--":[165,175],"37.5%":[166],"test":[168],"vs.":[170],"0%":[171,177],"patch":[173,179],"end-to-end":[178],"production,":[180],"establishing":[181],"boundary":[183],"between":[184],"architecture":[186],"plan":[188],"synthesis:":[189],"is":[192],"necessary":[193],"but":[194],"not":[195],"sufficient":[196],"autonomous":[198]},"counts_by_year":[],"updated_date":"2026-03-31T06:02:25.137627","created_date":"2025-12-26T00:00:00"}
