{"id":"https://openalex.org/W7138910657","doi":"https://doi.org/10.1609/aaai.v40i41.40797","title":"GenPRM: Scaling Test-Time Compute of Process Reward Models via Generative Reasoning","display_name":"GenPRM: Scaling Test-Time Compute of Process Reward Models via Generative Reasoning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138910657","doi":"https://doi.org/10.1609/aaai.v40i41.40797"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i41.40797","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i41.40797","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40797/44758","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40797/44758","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078587654","display_name":"Jian Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jian Zhao","raw_affiliation_strings":["Tsinghua University\nBeijing University of Posts and Telecommunications"],"affiliations":[{"raw_affiliation_string":"Tsinghua University\nBeijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075239885","display_name":"Runze Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runze Liu","raw_affiliation_strings":["Tsinghua University\nShanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Tsinghua University\nShanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I99065089","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101999478","display_name":"Kaiyan Zhang","orcid":"https://orcid.org/0000-0002-8059-1124"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiyan Zhang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130100697","display_name":"Zhimu Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhimu Zhou","raw_affiliation_strings":["Beijing University of Posts and Telecommunications"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102621152","display_name":"Junqi Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junqi Gao","raw_affiliation_strings":["Harbin Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130052593","display_name":"Dong Li","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Li","raw_affiliation_strings":["Harbin Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067453635","display_name":"Jiafei Lyu","orcid":"https://orcid.org/0000-0001-6616-417X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiafei Lyu","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065103659","display_name":"Zhouyi Qian","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhouyi Qian","raw_affiliation_strings":["Harbin Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129789245","display_name":"Biqing Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Biqing Qi","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100353867","display_name":"Xin Li","orcid":"https://orcid.org/0000-0002-4370-2876"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu Li","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101803493","display_name":"Bin Zhou","orcid":"https://orcid.org/0000-0003-4245-0694"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bowen Zhou","raw_affiliation_strings":["Tsinghua University\nShanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Tsinghua University\nShanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I99065089","https://openalex.org/I4391012619"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5078587654"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.82119914,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"41","first_page":"34932","last_page":"34940"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.18809999525547028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.18809999525547028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1712000072002411,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.1446000039577484,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.670799970626831},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5823000073432922},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5688999891281128},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.557699978351593},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5130000114440918},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.45559999346733093},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.423799991607666},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4058000147342682}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368000149726868},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.670799970626831},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5823000073432922},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5688999891281128},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.557699978351593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5454000234603882},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5221999883651733},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5130000114440918},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.45559999346733093},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.423799991607666},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4058000147342682},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C174998907","wikidata":"https://www.wikidata.org/wiki/Q357662","display_name":"Work in process","level":2,"score":0.37529999017715454},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3562000095844269},{"id":"https://openalex.org/C76956256","wikidata":"https://www.wikidata.org/wiki/Q27610560","display_name":"Process modeling","level":3,"score":0.32690000534057617},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2802000045776367},{"id":"https://openalex.org/C187179951","wikidata":"https://www.wikidata.org/wiki/Q7784616","display_name":"Thinking processes","level":3,"score":0.27549999952316284},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i41.40797","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i41.40797","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40797/44758","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i41.40797","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i41.40797","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40797/44758","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138910657.pdf","grobid_xml":"https://content.openalex.org/works/W7138910657.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,188],"Large":[3],"Language":[4],"Models":[5,17],"(LLMs)":[6],"have":[7],"shown":[8],"that":[9,75,111,125,179],"it":[10],"is":[11],"promising":[12],"to":[13,21,57,160],"utilize":[14],"Process":[15],"Reward":[16],"(PRMs)":[18],"as":[19,162],"verifiers":[20],"enhance":[22],"the":[23,49,59,181],"performance":[24],"of":[25,52,62],"LLMs.":[26,189],"However,":[27],"current":[28],"PRMs":[29,130,184],"face":[30],"three":[31],"key":[32],"challenges:":[33],"(1)":[34],"limited":[35],"process":[36,72,94,177],"supervision":[37,95,178],"and":[38,54,97,106,119,147,185],"generalization":[39],"capabilities,":[40],"(2)":[41],"dependence":[42],"on":[43,117,153],"scalar":[44],"value":[45],"prediction":[46],"without":[47],"leveraging":[48],"generative":[50,71],"abilities":[51,159],"LLMs,":[53],"(3)":[55],"inability":[56],"scale":[58],"test-time":[60,140],"compute":[61],"PRMs.":[63],"In":[64],"this":[65],"work,":[66],"we":[67,100],"introduce":[68],"GenPRM,":[69],"a":[70,107,142,148,163,173],"reward":[73],"model":[74,165,168],"performs":[76],"explicit":[77],"Chain-of-Thought":[78],"(CoT)":[79],"reasoning":[80,89,122],"with":[81,131],"code":[82,113],"verification":[83],"before":[84],"providing":[85],"judgment":[86],"for":[87,166,176],"each":[88],"step.":[90],"To":[91],"obtain":[92],"high-quality":[93],"labels":[96],"rationale":[98,108],"data,":[99],"propose":[101],"Relative":[102],"Progress":[103],"Estimation":[104],"(RPE)":[105],"synthesis":[109],"framework":[110],"incorporates":[112],"verification.":[114],"Experimental":[115],"results":[116],"ProcessBench":[118],"several":[120],"mathematical":[121],"tasks":[123],"show":[124],"GenPRM":[126,144,150,156],"significantly":[127],"outperforms":[128,145],"prior":[129],"only":[132],"23K":[133],"training":[134],"data":[135],"from":[136],"MATH":[137],"dataset.":[138],"Through":[139],"scaling,":[141],"1.5B":[143],"GPT-4o,":[146],"7B":[149],"surpasses":[151],"Qwen2.5-Math-PRM-72B":[152],"ProcessBench.":[154],"Additionally,":[155],"demonstrates":[157],"strong":[158],"serve":[161],"critic":[164,186],"policy":[167],"refinement.":[169],"This":[170],"work":[171],"establishes":[172],"new":[174],"paradigm":[175],"bridges":[180],"gap":[182],"between":[183],"models":[187]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
