{"id":"https://openalex.org/W7138081061","doi":"https://doi.org/10.48550/arxiv.2603.15255","title":"SAGE: Multi-Agent Self-Evolution for LLM Reasoning","display_name":"SAGE: Multi-Agent Self-Evolution for LLM Reasoning","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138081061","doi":"https://doi.org/10.48550/arxiv.2603.15255"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15255","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129729858","display_name":"Yulin Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Peng, Yulin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129726269","display_name":"Xinxin Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xinxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129656429","display_name":"Chenxing Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Chenxing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125726023","display_name":"Nianbo Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Nianbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129670511","display_name":"Leilei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Leilei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062558316","display_name":"Yong He","orcid":"https://orcid.org/0000-0003-4062-9201"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Ying Tiffany","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129714932","display_name":"F. Richard Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, F. Richard","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129729858"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2800000011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2800000011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10849999636411667,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.70169997215271},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5429999828338623},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.5320000052452087},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5264999866485596},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5254999995231628},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.47350001335144043},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.444599986076355},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.43290001153945923},{"id":"https://openalex.org/keywords/problem-solver","display_name":"Problem solver","score":0.42910000681877136},{"id":"https://openalex.org/keywords/non-monotonic-logic","display_name":"Non-monotonic logic","score":0.4221000075340271}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.70169997215271},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.652899980545044},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5429999828338623},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.5320000052452087},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5264999866485596},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5254999995231628},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5181999802589417},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.47350001335144043},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.444599986076355},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.43290001153945923},{"id":"https://openalex.org/C3019612716","wikidata":"https://www.wikidata.org/wiki/Q730920","display_name":"Problem solver","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.4221000075340271},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.399399995803833},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3853999972343445},{"id":"https://openalex.org/C168773036","wikidata":"https://www.wikidata.org/wiki/Q264164","display_name":"Recursion (computer science)","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C164155591","wikidata":"https://www.wikidata.org/wiki/Q2067766","display_name":"Satisfiability modulo theories","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2831999957561493},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26159998774528503},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15255","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15255","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.46001747250556946,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"with":[2],"verifiable":[3],"rewards":[4],"improves":[5],"reasoning":[6,48],"in":[7,37],"large":[8,18],"language":[9],"models":[10],"(LLMs),":[11],"but":[12],"many":[13],"methods":[14],"still":[15],"rely":[16],"on":[17,147,151],"human-labeled":[19],"datasets.":[20],"While":[21],"self-play":[22],"reduces":[23],"this":[24],"dependency,":[25],"it":[26],"often":[27],"lacks":[28],"explicit":[29],"planning":[30],"and":[31,59,90,110,115,121,131,149],"strong":[32],"quality":[33],"control,":[34],"limiting":[35],"stability":[36],"long-horizon":[38],"multi-step":[39,88],"reasoning.":[40],"We":[41],"present":[42],"SAGE":[43,134],"(Self-evolving":[44],"Agents":[45],"for":[46],"Generalized":[47],"Evolution),":[49],"a":[50,63,69,86],"closed-loop":[51],"framework":[52],"where":[53],"four":[54],"agents:":[55],"Challenger,":[56],"Planner,":[57],"Solver,":[58],"Critic,":[60],"co-evolve":[61],"from":[62],"shared":[64],"LLM":[65],"backbone":[66],"using":[67],"only":[68],"small":[70],"seed":[71],"set.":[72],"The":[73,107],"Challenger":[74],"continuously":[75],"generates":[76],"increasingly":[77],"difficult":[78],"tasks;":[79],"the":[80,91,94,142],"Planner":[81],"converts":[82],"each":[83],"task":[84],"into":[85],"structured":[87],"plan;":[89],"Solver":[92],"follows":[93],"plan":[95],"to":[96,117],"produce":[97],"an":[98],"answer,":[99],"whose":[100],"correctness":[101],"is":[102],"determined":[103],"by":[104,145],"external":[105],"verifiers.":[106],"Critic":[108],"scores":[109],"filters":[111],"both":[112],"generated":[113],"questions":[114],"plans":[116],"prevent":[118],"curriculum":[119],"drift":[120],"maintain":[122],"training":[123],"signal":[124],"quality,":[125],"enabling":[126],"stable":[127],"self-training.":[128],"Across":[129],"mathematics":[130],"code-generation":[132],"benchmarks,":[133],"delivers":[135],"consistent":[136],"gains":[137],"across":[138],"model":[139,144],"scales,":[140],"improving":[141],"Qwen-2.5-7B":[143],"8.9%":[146],"LiveCodeBench":[148],"10.7%":[150],"OlympiadBench.":[152]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
