{"id":"https://openalex.org/W7163027430","doi":"https://doi.org/10.48550/arxiv.2605.30621","title":"Harness Updating Is Not Harness Benefit: Disentangling Evolution Capabilities in Self-Evolving LLM Agents","display_name":"Harness Updating Is Not Harness Benefit: Disentangling Evolution Capabilities in Self-Evolving LLM Agents","publication_year":2026,"publication_date":"2026-05-28","ids":{"openalex":"https://openalex.org/W7163027430","doi":"https://doi.org/10.48550/arxiv.2605.30621"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.30621","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30621","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.30621","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130236295","display_name":"Minhua Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Minhua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137563377","display_name":"Juncheng Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Juncheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137545972","display_name":"Zijun Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zijun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137607334","display_name":"Zhan Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Zhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078860840","display_name":"Yisi Sang","orcid":"https://orcid.org/0000-0002-8876-7542"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang, Yisi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100654849","display_name":"Bing He","orcid":"https://orcid.org/0000-0001-5304-5251"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Bing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137559621","display_name":"Zewen Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zewen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137612250","display_name":"Tianxin Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Tianxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137538590","display_name":"Zongyu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Zongyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137603238","display_name":"Zhiwei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137522046","display_name":"Dakuo Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Dakuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137544277","display_name":"xiang zhang","orcid":"https://orcid.org/0000-0002-1017-742X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137581706","display_name":"Benoit Dumoulin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dumoulin, Benoit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137560670","display_name":"Cihang Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Cihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137544645","display_name":"Yuyin Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yuyin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011048500","display_name":"Suhang Wang","orcid":"https://orcid.org/0000-0003-3448-4878"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Suhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130230456","display_name":"Hanqing Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Hanqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.29409998655319214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.29409998655319214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.09179999679327011,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.06620000302791595,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6654000282287598},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.45750001072883606},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.45239999890327454},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.4397999942302704},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.43149998784065247},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.37369999289512634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6800000071525574},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6654000282287598},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.45750001072883606},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.45239999890327454},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.4397999942302704},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.43149998784065247},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.3564999997615814},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3443000018596649},{"id":"https://openalex.org/C2777093003","wikidata":"https://www.wikidata.org/wiki/Q6508345","display_name":"Lead (geology)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2655999958515167}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.30621","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30621","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.30621","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30621","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLM":[0],"agents":[1,30],"are":[2],"increasingly":[3],"deployed":[4],"as":[5],"systems":[6],"built":[7],"around":[8],"editable":[9],"external":[10],"harnesses,":[11,148],"including":[12],"prompts,":[13],"skills,":[14],"memories":[15],"and":[16,61,153,204,208],"tools,":[17],"that":[18,118],"shape":[19],"task":[20,96],"execution":[21,36,84],"without":[22],"changing":[23],"model":[24],"parameters.":[25],"Harness":[26],"self-evolution":[27,71],"adapts":[28],"such":[29],"by":[31],"updating":[32],"these":[33],"harnesses":[34,94],"from":[35,65,83,92,111,146],"evidence.":[37],"Yet":[38],"it":[39],"remains":[40],"unclear":[41],"whether":[42],"a":[43],"model's":[44],"base":[45,108,140],"capability":[46,76,89,113,194],"in":[47,52,107,139,196,212],"task-solving":[48,198],"predicts":[49],"its":[50],"capabilities":[51],"harness":[53,59,70,81,116,179,206],"self-evolution:":[54],"which":[55,62],"models":[56,110,143,150,155,173],"produce":[57,78,115],"useful":[58,79],"updates,":[60],"actually":[63],"benefit":[64,91,144,151,156],"them?":[66],"We":[67,160],"analyze":[68],"two":[69,101,169],"capabilities:":[72],"(i)":[73],"harness-updating,":[74],"the":[75,88,165,197,202],"to":[77,90,120,130,168,176,186],"persistent":[80],"updates":[82,117,126],"evidence;":[85],"(ii)":[86],"harness-benefit,":[87],"updated":[93,147],"during":[95],"solving.":[97],"Our":[98,215],"analysis":[99],"reveals":[100],"findings.":[102],"First,":[103],"harness-updating":[104],"is":[105,137,218],"flat":[106],"capability:":[109,141],"different":[112],"tiers":[114],"lead":[119],"surprisingly":[121],"similar":[122],"gains;":[123],"even":[124],"Qwen3.5-9B's":[125],"yield":[127],"gains":[128,163],"comparable":[129],"those":[131],"of":[132],"Claude":[133],"Opus~4.6.":[134],"Second,":[135],"harness-benefit":[136],"non-monotonic":[138],"weak-tier":[142,172],"little":[145],"mid-tier":[149],"most,":[152],"strong-tier":[154],"less":[157],"than":[158,201],"mid-tier.":[159],"trace":[161],"low":[162],"at":[164,221],"weak":[166],"tier":[167],"failure":[170],"modes:":[171],"may":[174],"fail":[175,185],"activate":[177,182],"relevant":[178],"artifacts,":[180],"or":[181],"them":[183,188],"but":[184],"follow":[187],"faithfully.":[189],"These":[190],"findings":[191],"suggest":[192],"investing":[193],"budget":[195],"agent":[199,213],"rather":[200],"evolver,":[203],"targeting":[205],"invocation":[207],"long-horizon":[209],"instruction":[210],"following":[211],"training.":[214],"source":[216],"code":[217],"publicly":[219],"available":[220],"https://github.com/A-EVO-Lab/a-evolve/tree/release/harness-evolution.":[222]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-06-02T00:00:00"}
