{"id":"https://openalex.org/W7160286454","doi":"https://doi.org/10.48550/arxiv.2605.01203","title":"GR-Ben: A General Reasoning Benchmark for Evaluating Process Reward Models","display_name":"GR-Ben: A General Reasoning Benchmark for Evaluating Process Reward Models","publication_year":2026,"publication_date":"2026-05-02","ids":{"openalex":"https://openalex.org/W7160286454","doi":"https://doi.org/10.48550/arxiv.2605.01203"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.01203","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01203","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.01203","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100584977","display_name":"Zhouhao Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sun, Zhouhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135313319","display_name":"Xuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135330350","display_name":"Xiao Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135354265","display_name":"Bibo Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Bibo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135323812","display_name":"Li Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135318491","display_name":"Kai Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135401778","display_name":"Xinran Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Xinran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135289681","display_name":"Fei Zhang","orcid":"https://orcid.org/0000-0002-1194-8513"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Fei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135302079","display_name":"weidi tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"tang, weidi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120558250","display_name":"Zhiyuan Kan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kan, Zhiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135350966","display_name":"Yang Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135414597","display_name":"Bing Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Bing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135300223","display_name":"Ting Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5100584977"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4074999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4074999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.052400000393390656,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.050700001418590546,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7664999961853027},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6159999966621399},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.597599983215332},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5759999752044678},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5105999708175659},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.45660001039505005},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.40459999442100525}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7664999961853027},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6916000247001648},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6159999966621399},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.597599983215332},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5759999752044678},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5105999708175659},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4864000082015991},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.45660001039505005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45010000467300415},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.40459999442100525},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.3921000063419342},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.3743000030517578},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.36399999260902405},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2994999885559082},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2662000060081482},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.2581000030040741}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.01203","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01203","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.01203","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01203","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5271522402763367,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Currently,":[0],"process":[1],"reward":[2],"models":[3,15],"(PRMs)":[4],"have":[5],"exhibited":[6],"remarkable":[7],"potential":[8],"for":[9,39,81,167],"test-time":[10],"scaling.":[11],"Since":[12],"large":[13],"language":[14],"(LLMs)":[16],"regularly":[17],"generate":[18],"flawed":[19],"intermediate":[20],"reasoning":[21,29,67,88,173],"steps":[22],"when":[23],"tackling":[24],"a":[25,76,101],"broad":[26],"spectrum":[27],"of":[28,63,104,126,175],"and":[30,91,93,110,112,129],"decision-making":[31],"tasks,":[32],"PRMs":[33,64,109,128,141,166],"are":[34,142],"required":[35],"to":[36,56,133],"possess":[37],"capabilities":[38,174],"detecting":[40,155],"process-level":[41,77],"errors":[42],"in":[43,154],"real-world":[44],"scenarios.":[45,68],"However,":[46],"existing":[47,127],"benchmarks":[48],"primarily":[49],"focus":[50],"on":[51,100,165],"mathematical":[52,121],"reasoning,":[53,122],"thereby":[54,170],"failing":[55],"comprehensively":[57],"evaluate":[58],"the":[59,123,172],"error":[60],"detection":[61],"ability":[62,125],"across":[65,85],"diverse":[66,102],"To":[69],"mitigate":[70],"this":[71],"gap,":[72],"we":[73],"introduce":[74],"GR-Ben,":[75],"benchmark":[78],"specifically":[79],"designed":[80],"assessing":[82],"PRM's":[83],"performance":[84,153],"two":[86,114],"primary":[87],"domains":[89,119],"(science":[90],"logic)":[92],"nine":[94],"subdomains.":[95],"We":[96,158],"conduct":[97],"extensive":[98],"experiments":[99],"set":[103],"22":[105],"models,":[106],"encompassing":[107],"both":[108],"LLMs,":[111],"derive":[113],"key":[115],"findings:":[116],"(1)":[117],"In":[118,139],"beyond":[120],"error-detection":[124],"LLMs":[130,150],"is":[131],"found":[132],"be":[134],"markedly":[135],"weaker":[136],"by":[137],"comparison.(2)":[138],"general,":[140],"less":[143],"adept":[144],"at":[145],"identifying":[146],"knowledge-based":[147],"errors,":[148],"whereas":[149],"exhibit":[151],"poorer":[152],"computational":[156],"errors.":[157],"hope":[159],"GR-Ben":[160],"can":[161],"foster":[162],"future":[163],"researches":[164],"general":[168],"domains,":[169],"enhancing":[171],"LLMs.":[176]},"counts_by_year":[],"updated_date":"2026-05-09T06:09:20.037420","created_date":"2026-05-06T00:00:00"}
