{"id":"https://openalex.org/W7152375443","doi":"https://doi.org/10.48550/arxiv.2604.07236","title":"How Much Heavy Lifting Can an Agent Harness Do?: Measuring the LLM's Residual Role in a Planning Agent","display_name":"How Much Heavy Lifting Can an Agent Harness Do?: Measuring the LLM's Residual Role in a Planning Agent","publication_year":2026,"publication_date":"2026-04-08","ids":{"openalex":"https://openalex.org/W7152375443","doi":"https://doi.org/10.48550/arxiv.2604.07236"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.07236","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07236","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.07236","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jung, Sungwoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jung, Sungwoo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133278567","display_name":"Seonil Son","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Son, Seonil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.2053000032901764,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.2053000032901764,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.186599999666214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.08839999884366989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reflection","display_name":"Reflection (computer programming)","score":0.46000000834465027},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.4413999915122986},{"id":"https://openalex.org/keywords/competence","display_name":"Competence (human resources)","score":0.42809998989105225},{"id":"https://openalex.org/keywords/metaprogramming","display_name":"Metaprogramming","score":0.3488999903202057},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.31130000948905945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7014999985694885},{"id":"https://openalex.org/C65682993","wikidata":"https://www.wikidata.org/wiki/Q1056451","display_name":"Reflection (computer programming)","level":2,"score":0.46000000834465027},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.4413999915122986},{"id":"https://openalex.org/C100521375","wikidata":"https://www.wikidata.org/wiki/Q2015382","display_name":"Competence (human resources)","level":2,"score":0.42809998989105225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3926999866962433},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3513999879360199},{"id":"https://openalex.org/C35390924","wikidata":"https://www.wikidata.org/wiki/Q661075","display_name":"Metaprogramming","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.337799996137619},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.30959999561309814},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C128913409","wikidata":"https://www.wikidata.org/wiki/Q3566063","display_name":"Belief revision","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.07236","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07236","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.07236","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07236","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.525303304195404,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Agent":[0],"harnesses":[1],"--":[2,19,82,96],"the":[3,55,66,105,117,123,133,191],"stateful":[4],"programs":[5],"that":[6,161],"wrap":[7],"a":[8,28,39,70,98,140,176],"language":[9],"model":[10,30],"and":[11,60,91,108,112,165],"decide":[12],"what":[13],"it":[14,45],"sees":[15],"at":[16],"each":[17],"step":[18],"are":[20,187],"now":[21],"known":[22],"to":[23,122,158],"change":[24],"end-to-end":[25],"performance":[26],"on":[27,163,170],"fixed":[29],"by":[31],"as":[32,34,104,110,116,197],"much":[33,49,62],"six":[35],"times.":[36],"That":[37],"raises":[38],"question":[40],"asked":[41],"less":[42],"often":[43],"than":[44,200],"should":[46],"be:":[47],"how":[48,61],"of":[50,173],"an":[51,92],"agent's":[52],"competence":[53],"does":[54],"harness":[56,72,185],"itself":[57],"already":[58],"carry,":[59],"genuinely":[63],"still":[64],"needs":[65],"LLM?":[67],"We":[68],"externalize":[69],"planning":[71],"for":[73],"noisy":[74],"Collaborative":[75],"Battleship":[76],"into":[77],"four":[78],"progressively":[79],"richer":[80],"layers":[81,186],"posterior":[83],"belief":[84],"tracking,":[85],"declarative":[86,129],"planning,":[87],"symbolic":[88,146],"reflec":[89],"tion,":[90],"LLM-backed":[93,166],"revision":[94,167],"gate":[95],"under":[97],"common":[99],"runtime,":[100],"taking":[101],"\\emph{win":[102],"rate}":[103],"primary":[106,124],"metric":[107],"\\emph{F1}":[109],"secondary,":[111],"pre-specifying":[113],"\\emph{heavy":[114],"lifting}":[115],"single":[118],"largest":[119],"positive":[120],"marginal":[121],"metric.":[125],"Across":[126],"54":[127],"games,":[128],"pla":[130],"nning":[131],"carries":[132],"heavy":[134],"lifting":[135],"($+24.1$pp":[136],"win":[137],"rate":[138],"over":[139],"belief-only":[141],"harness,":[142],"zero":[143],"LLM":[144],"calls);":[145],"reflection":[147],"is":[148,182],"mechanistically":[149],"real":[150],"but":[151],"calibration-sensitive,":[152],"with":[153,175],"signed":[154],"board-level":[155],"effects":[156],"up":[157],"$\\pm0.140$":[159],"F1":[160],"cancel":[162],"aggregate;":[164],"ac":[168],"tivates":[169],"only":[171],"$4.3\\%$":[172],"turns":[174],"bounded,":[177],"non-monotonic":[178],"effect.":[179],"The":[180],"contribution":[181],"methodological:":[183],"once":[184],"made":[188],"externally":[189],"measurable,":[190],"LLM's":[192],"role":[193],"can":[194],"be":[195],"quantified":[196],"residual":[198],"rather":[199],"assumed":[201],"central.":[202]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-10T00:00:00"}
