{"id":"https://openalex.org/W7139935011","doi":"https://doi.org/10.48550/arxiv.2603.18614","title":"ZEBRAARENA: A Diagnostic Simulation Environment for Studying Reasoning-Action Coupling in Tool-Augmented LLMs","display_name":"ZEBRAARENA: A Diagnostic Simulation Environment for Studying Reasoning-Action Coupling in Tool-Augmented LLMs","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139935011","doi":"https://doi.org/10.48550/arxiv.2603.18614"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.18614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.18614","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112946948","display_name":"Wanjia Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhao, Wanjia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130226593","display_name":"Ludwig Schmidt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schmidt, Ludwig","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130225259","display_name":"James Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012491878","display_name":"Vidhisha Balachandran","orcid":"https://orcid.org/0009-0009-0465-0098"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balachandran, Vidhisha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130242110","display_name":"Lingjiao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Lingjiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5112946948"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.20730000734329224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.20730000734329224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.15320000052452087,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.12070000171661377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.689300000667572},{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.6884999871253967},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6687999963760376},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6553000211715698},{"id":"https://openalex.org/keywords/coupling","display_name":"Coupling (piping)","score":0.5631999969482422},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4350999891757965}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7009000182151794},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.689300000667572},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.6884999871253967},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6687999963760376},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6553000211715698},{"id":"https://openalex.org/C131584629","wikidata":"https://www.wikidata.org/wiki/Q4308705","display_name":"Coupling (piping)","level":2,"score":0.5631999969482422},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43619999289512634},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4350999891757965},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.38769999146461487},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3817000091075897},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.329800009727478},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.30410000681877136},{"id":"https://openalex.org/C2777220311","wikidata":"https://www.wikidata.org/wiki/Q6423340","display_name":"Knowledge acquisition","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2581000030040741},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.2574999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.18614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.18614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tool-augmented":[0],"large":[1],"language":[2],"models":[3,132],"(LLMs)":[4],"must":[5],"tightly":[6],"couple":[7],"multi-step":[8],"reasoning":[9,119,131,192],"with":[10,20,47],"external":[11,85,122,194],"actions,":[12],"yet":[13],"existing":[14],"benchmarks":[15],"often":[16],"confound":[17],"this":[18,30],"interplay":[19,189],"complex":[21],"environment":[22,39],"dynamics,":[23],"memorized":[24],"knowledge":[25],"or":[26,59],"dataset":[27,60],"contamination.":[28,61],"In":[29],"paper,":[31],"we":[32],"introduce":[33],"ZebraArena,":[34],"a":[35,51,67,100,115,127,151],"procedurally":[36],"generated":[37],"diagnostic":[38],"for":[40,105],"studying":[41],"reasoning-action":[42],"coupling":[43],"in":[44,64,178],"tool-augmented":[45],"LLMs,":[46],"controllable":[48],"difficulty":[49],"and":[50,88,99,120,136,157,181,193],"knowledge-minimal":[52],"design,":[53],"which":[54,72,125],"limits":[55],"gains":[56],"from":[57],"memorization":[58],"Each":[62],"task":[63],"ZebraArena":[65,113,183],"requires":[66,114],"set":[68],"of":[69,117],"critical":[70],"information":[71,86],"is":[73],"available":[74],"only":[75,140],"through":[76],"targeted":[77],"tool":[78,108,123,159,167],"use,":[79],"yielding":[80],"an":[81],"interpretable":[82],"interface":[83],"between":[84,154,190],"acquisition":[87],"deductive":[89],"reasoning.":[90],"This":[91],"design":[92],"provides":[93],"deterministic":[94],"evaluation":[95],"via":[96],"unique":[97],"solutions,":[98],"theoretical":[101,155,171],"optimal":[102],"query":[103],"count":[104],"measuring":[106],"efficient":[107],"use.":[109],"We":[110,148,173],"show":[111],"that":[112],"combination":[116],"in-depth":[118],"accurate":[121],"calling,":[124],"remains":[126],"challenge":[128],"as":[129,134],"frontier":[130],"such":[133],"GPT-5":[135,163],"Gemini":[137],"2.5":[138],"Pro":[139],"achieves":[141],"60%":[142],"accuracy":[143],"on":[144,187],"the":[145,170,175,188],"hard":[146],"instances.":[147],"also":[149],"observe":[150],"persistent":[152],"gaps":[153],"optimality":[156],"practical":[158],"usage.":[160],"For":[161],"example,":[162],"uses":[164],"70-270%":[165],"more":[166],"calls":[168],"than":[169],"optimum.":[172],"highlight":[174],"key":[176],"findings":[177],"our":[179],"evaluation,":[180],"hope":[182],"stimulates":[184],"further":[185],"research":[186],"internal":[191],"action.":[195]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-21T00:00:00"}
