{"id":"https://openalex.org/W7161712591","doi":"https://doi.org/10.48550/arxiv.2605.17291","title":"Step-wise Rubric Rewards for LLM Reasoning","display_name":"Step-wise Rubric Rewards for LLM Reasoning","publication_year":2026,"publication_date":"2026-05-17","ids":{"openalex":"https://openalex.org/W7161712591","doi":"https://doi.org/10.48550/arxiv.2605.17291"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17291","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17291","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17291","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136454645","display_name":"Weichu Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Weichu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136456058","display_name":"Haozhe Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Haozhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136479148","display_name":"Wenpu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Wenpu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136482371","display_name":"Yongfu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yongfu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136477310","display_name":"Liang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136486682","display_name":"Minghao Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Minghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136496741","display_name":"Zirong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zirong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136478671","display_name":"Yuqi Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yuqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136458617","display_name":"Shuai Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Shuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136460514","display_name":"Ziyue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ziyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136464005","display_name":"Xinbo Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xinbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020080521","display_name":"Kean Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Kean","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136455097","display_name":"Ruoyu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Ruoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136484208","display_name":"Xiaoying Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaoying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136499313","display_name":"Wenqi Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Wenqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136502353","display_name":"Baobao Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Baobao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136488634","display_name":"Nan Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Nan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136488579","display_name":"Jiaqi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5863000154495239,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5863000154495239,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09520000219345093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.058400001376867294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.972599983215332},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7009999752044678},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5020999908447266},{"id":"https://openalex.org/keywords/outcome","display_name":"Outcome (game theory)","score":0.49959999322891235},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.4627000093460083},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4359000027179718}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.972599983215332},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7009999752044678},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6187000274658203},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5020999908447266},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.49959999322891235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46320000290870667},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.4627000093460083},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39959999918937683},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3555000126361847},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.28349998593330383},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2824000120162964},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27309998869895935},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2669999897480011}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17291","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17291","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17291","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17291","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6821322441101074,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"with":[2,21,156],"Verifiable":[3],"Rewards":[4,33,112],"(RLVR)":[5],"is":[6],"widely":[7],"used":[8],"to":[9,56,123,128,225,233],"improve":[10],"reasoning":[11,131,187,197],"in":[12,88,100],"large":[13],"language":[14],"models,":[15],"but":[16],"rewards":[17],"only":[18,141],"final-answer":[19],"correctness":[20],"no":[22],"supervision":[23,37,68],"over":[24,203],"intermediate":[25],"steps.":[26],"Rubric-based":[27],"methods":[28],"such":[29],"as":[30,32,111],"Rubrics":[31,110],"(RaR)":[34],"introduce":[35,108],"finer-grained":[36],"by":[38,178,205],"scoring":[39],"rollouts":[40,139],"against":[41],"structured":[42],"criteria,":[43],"yet":[44,93,105],"the":[45,57,153,157,167,216],"rubric":[46,126,136,176,181],"scores":[47,137],"are":[48,91,103],"still":[49],"aggregated":[50],"into":[51],"a":[52,129,147,161,174,191],"single":[53],"scalar":[54],"applied":[55],"entire":[58],"response,":[59],"causing":[60],"three":[61],"weaknesses:":[62],"loss":[63],"of":[64,69,86,98],"multi-criterion":[65],"structure,":[66],"uniform":[67],"correct":[70,104,184],"and":[71,74,150,185,210,227],"incorrect":[72],"steps,":[73],"reward":[75,155,159],"hacking":[76],"through":[77,160],"unbounded":[78],"self-correction.":[79],"On":[80],"1,000":[81],"problems,":[82],"we":[83],"find":[84],"18.2%":[85],"steps":[87,99,142],"correct-answer":[89],"responses":[90,102],"wrong":[92],"positively":[94],"rewarded,":[95],"while":[96],"49.9%":[97],"incorrect-answer":[101],"penalized.":[106],"We":[107,171],"Step-wise":[109],"(SRaR),":[113],"an":[114,120],"RLVR":[115],"framework":[116],"that":[117,165],"(i)":[118],"uses":[119],"LLM":[121],"judge":[122],"attribute":[124],"each":[125],"item":[127],"specific":[130],"step,":[132],"(ii)":[133],"normalizes":[134],"per-step":[135,154],"across":[138],"so":[140],"whose":[143],"quality":[144],"varies":[145],"produce":[146],"learning":[148],"signal,":[149],"(iii)":[151],"combines":[152],"outcome":[158,168],"decoupled":[162],"advantage":[163],"estimator":[164],"keeps":[166],"baseline":[169],"stable.":[170],"further":[172],"build":[173],"16K-problem":[175],"dataset":[177],"contrastively":[179],"distilling":[180],"items":[182],"from":[183,190,223,231],"flawed":[186],"paths":[188],"sampled":[189],"strong":[192],"model.":[193],"Across":[194],"six":[195],"mathematical":[196],"benchmarks,":[198],"SRaR":[199],"improves":[200],"average":[201],"accuracy":[202],"RaR":[204],"3.57":[206],"points":[207,212],"on":[208,213,220],"Qwen3-8B":[209],"2.75":[211],"Qwen3-32B,":[214],"raises":[215],"Faithful":[217],"Reasoning":[218],"Rate":[219],"AIME":[221],"2025":[222],"34.5%":[224],"46.7%,":[226],"reduces":[228],"self-correction":[229],"looping":[230],"48.1%":[232],"26.5%.":[234]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
