{"id":"https://openalex.org/W7139961675","doi":"https://doi.org/10.48550/arxiv.2603.18886","title":"Reasoning over mathematical objects: on-policy reward modeling and test time aggregation","display_name":"Reasoning over mathematical objects: on-policy reward modeling and test time aggregation","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139961675","doi":"https://doi.org/10.48550/arxiv.2603.18886"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.18886","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.18886","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045442167","display_name":"Pranjal Aggarwal","orcid":"https://orcid.org/0000-0002-2962-1535"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Aggarwal, Pranjal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130251384","display_name":"Marjan Ghazvininejad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghazvininejad, Marjan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014381991","display_name":"Seungone Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Seungone","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103110668","display_name":"Ilia Kulikov","orcid":"https://orcid.org/0000-0001-5989-9733"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kulikov, Ilia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016503379","display_name":"Jack Lanchantin","orcid":"https://orcid.org/0000-0003-0811-0944"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lanchantin, Jack","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130246882","display_name":"Xian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130222618","display_name":"Tianjian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Tianjian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130230820","display_name":"Bo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Bo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130232895","display_name":"Graham Neubig","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Neubig, Graham","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064234845","display_name":"Anaelia Ovalle","orcid":"https://orcid.org/0000-0002-0531-7520"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ovalle, Anaelia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057314288","display_name":"Swarnadeep Saha","orcid":"https://orcid.org/0000-0002-6972-3448"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saha, Swarnadeep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060255128","display_name":"Sainbayar Sukhbaatar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sukhbaatar, Sainbayar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130245922","display_name":"Sean Welleck","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Welleck, Sean","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130251995","display_name":"Jason Weston","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weston, Jason","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130245001","display_name":"Chenxi Whitehouse","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Whitehouse, Chenxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062696000","display_name":"Adina Williams","orcid":"https://orcid.org/0000-0001-5281-3343"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Williams, Adina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130232408","display_name":"Jing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130242442","display_name":"Ping Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Ping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130228934","display_name":"Weizhe Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Weizhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130245184","display_name":"Jingyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130248602","display_name":"Wenting Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Wenting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":21,"corresponding_author_ids":["https://openalex.org/A5045442167"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.3366999924182892,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.3366999924182892,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1436000019311905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.042899999767541885,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7281000018119812},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4918000102043152},{"id":"https://openalex.org/keywords/mathematical-model","display_name":"Mathematical model","score":0.45890000462532043},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.4438000023365021},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.4431000053882599},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.3833000063896179},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3776000142097473}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7281000018119812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6996999979019165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5460000038146973},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4918000102043152},{"id":"https://openalex.org/C76969082","wikidata":"https://www.wikidata.org/wiki/Q486902","display_name":"Mathematical model","level":2,"score":0.45890000462532043},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4431000053882599},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3833000063896179},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3822999894618988},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3776000142097473},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C130327152","wikidata":"https://www.wikidata.org/wiki/Q748349","display_name":"Mathematical structure","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C66783780","wikidata":"https://www.wikidata.org/wiki/Q1166625","display_name":"Mathematical problem","level":2,"score":0.2824999988079071},{"id":"https://openalex.org/C2779193601","wikidata":"https://www.wikidata.org/wiki/Q20026918","display_name":"Mathematical theory","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.26179999113082886}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.18886","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.18886","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18886","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"ability":[1],"to":[2,52,116],"precisely":[3],"derive":[4],"mathematical":[5,33,69,82],"objects":[6],"is":[7],"a":[8],"core":[9],"requirement":[10],"for":[11,65,80],"downstream":[12],"STEM":[13],"applications,":[14],"including":[15],"mathematics,":[16],"physics,":[17],"and":[18,34,74,78,95,130,154],"chemistry,":[19],"where":[20,97],"reasoning":[21,36,67,161],"must":[22],"culminate":[23],"in":[24],"formally":[25],"structured":[26],"expressions.":[27],"Yet,":[28],"current":[29],"LM":[30],"evaluations":[31],"of":[32,55,160],"scientific":[35],"rely":[37],"heavily":[38],"on":[39,133,151],"simplified":[40],"answer":[41],"formats":[42],"such":[43,127],"as":[44,128],"numerical":[45,153],"values":[46],"or":[47],"multiple":[48],"choice":[49],"options":[50],"due":[51],"the":[53,84],"convenience":[54],"automated":[56],"assessment.":[57],"In":[58],"this":[59],"paper":[60],"we":[61,72,88,98,107],"provide":[62,89],"three":[63],"contributions":[64],"improving":[66,149],"over":[68,143],"objects:":[70],"(i)":[71],"build":[73],"release":[75],"training":[76,90,103,111,137],"data":[77],"benchmarks":[79],"deriving":[81],"objects,":[83],"Principia":[85],"suite;":[86],"(ii)":[87],"recipes":[91,138],"with":[92],"strong":[93,125],"LLM-judges":[94],"verifiers,":[96],"show":[99,108],"that":[100,124],"on-policy":[101,110],"judge":[102],"boosts":[104],"performance;":[105],"(iii)":[106],"how":[109],"can":[112,139],"also":[113],"be":[114],"used":[115],"scale":[117],"test-time":[118],"compute":[119],"via":[120],"aggregation.":[121],"We":[122],"find":[123],"LMs":[126],"Qwen3-235B":[129],"o3":[131],"struggle":[132],"Principia,":[134],"while":[135,147],"our":[136],"bring":[140],"significant":[141],"improvements":[142],"different":[144],"LLM":[145],"backbones,":[146],"simultaneously":[148],"results":[150],"existing":[152],"MCQA":[155],"tasks,":[156],"demonstrating":[157],"cross-format":[158],"generalization":[159],"abilities.":[162]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-21T00:00:00"}
