{"id":"https://openalex.org/W7161763640","doi":"https://doi.org/10.48550/arxiv.2605.19338","title":"STAR-P\u00f3lyaMath: Multi-Agent Reasoning under Persistent Meta-Strategic Supervision","display_name":"STAR-P\u00f3lyaMath: Multi-Agent Reasoning under Persistent Meta-Strategic Supervision","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161763640","doi":"https://doi.org/10.48550/arxiv.2605.19338"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.19338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.19338","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136577283","display_name":"Jiaao Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jiaao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136610062","display_name":"Xian Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136560887","display_name":"Hanzhang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Hanzhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035369234","display_name":"S. Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Sophia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136587720","display_name":"Yang, Fan, 1980-","orcid":"https://orcid.org/0000-0003-4801-763X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Fan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136521423","display_name":"Yinpeng Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Yinpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.10700000077486038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.10700000077486038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.10329999774694443,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.09040000289678574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5321999788284302},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.499099999666214},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4666999876499176},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.38600000739097595},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.35690000653266907},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.3366999924182892},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.31470000743865967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6187999844551086},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5321999788284302},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.499099999666214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4936999976634979},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4666999876499176},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.38600000739097595},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.35690000653266907},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33869999647140503},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.32510000467300415},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.32359999418258667},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.2919999957084656},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.28439998626708984},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2777000069618225},{"id":"https://openalex.org/C2777548347","wikidata":"https://www.wikidata.org/wiki/Q5456937","display_name":"Flagging","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2581999897956848},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2533999979496002},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.251800000667572},{"id":"https://openalex.org/C91306197","wikidata":"https://www.wikidata.org/wiki/Q45767","display_name":"Competition (biology)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.19338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.19338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5970873832702637,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Frontier":[0],"AI":[1],"models":[2],"and":[3,34,55,82,88,101,152,162,164],"multi-agent":[4,45],"systems":[5,22],"have":[6],"led":[7],"to":[8,24],"significant":[9],"improvements":[10],"in":[11,204],"mathematical":[12],"reasoning.":[13],"However,":[14],"for":[15],"problems":[16],"requiring":[17],"extended,":[18],"long-horizon":[19],"reasoning,":[20],"existing":[21],"continue":[23],"suffer":[25],"from":[26,80,189,195],"fundamental":[27],"reliability":[28],"issues:":[29],"hallucination":[30],"accumulation,":[31],"memory":[32,100],"fragmentation,":[33],"imbalanced":[35],"reasoning-tool":[36],"trade-offs.":[37],"In":[38],"this":[39],"paper,":[40],"we":[41],"introduce":[42],"STAR-P\u00f3lyaMath,":[43],"a":[44,73,94],"framework":[46],"that":[47,77,97,185],"systematically":[48],"addresses":[49],"these":[50],"challenges":[51],"through":[52,86],"meta-level":[53,103],"supervision":[54],"structured":[56,61],"Reasoner-Verifier":[57],"interaction.":[58],"STAR-P\u00f3lyaMath":[59,127],"is":[60,93,211],"as":[62],"an":[63],"orchestrated":[64],"state":[65],"machine":[66],"with":[67,175],"nested":[68],"challenge-step-replan":[69],"loops,":[70],"governed":[71],"by":[72,105,177],"reasoning-free":[74],"Python":[75],"orchestrator":[76],"separates":[78],"control":[79,104],"inference":[81],"bounds":[83],"error":[84],"propagation":[85],"trace-back":[87],"re-planning.":[89],"Our":[90],"key":[91,200],"innovation":[92],"persistent":[95],"Meta-Strategist":[96],"maintains":[98],"cross-attempt":[99],"exercises":[102],"issuing":[106],"high-level":[107],"strategic":[108],"guidance":[109],"or":[110,123,202],"mandatory":[111],"directives,":[112],"so":[113],"the":[114,178,186,190],"system":[115],"can":[116],"escape":[117],"unproductive":[118],"loops":[119],"rather":[120,193],"than":[121,194],"stagnate":[122],"over-rely":[124],"on":[125,131,159,169],"tools.":[126],"achieves":[128],"state-of-the-art":[129],"results":[130],"all":[132],"eight":[133],"top-tier":[134],"competition":[135],"benchmarks:":[136],"AIME":[137],"2025-2026,":[138],"MathArena":[139,142],"Apex":[140,143,170],"Shortlist,":[141],"2025,":[144,146,148,171],"Putnam":[145],"IMO":[147],"HMMT":[149],"February":[150],"2026,":[151],"USAMO":[153],"2026.":[154],"It":[155],"obtains":[156],"perfect":[157],"scores":[158],"AIMEs,":[160],"Putnam,":[161],"HMMT,":[163],"shows":[165],"its":[166],"largest":[167],"margin":[168],"scoring":[172],"93.75%":[173],"compared":[174],"80.21%":[176],"strongest":[179],"baseline":[180],"GPT-5.5.":[181],"Ablation":[182],"studies":[183],"show":[184],"gains":[187],"arise":[188],"framework's":[191],"orchestration":[192],"model-level":[196],"diversity":[197],"since":[198],"removing":[199],"components":[201],"substituting":[203],"mixed":[205],"backbones":[206],"consistently":[207],"weakens":[208],"performance.":[209],"Code":[210],"available":[212],"at":[213],"https://github.com/Julius-Woo/STAR-PolyaMath.":[214]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
