{"id":"https://openalex.org/W7133522813","doi":"https://doi.org/10.48550/arxiv.2603.02479","title":"PRISM: Pushing the Frontier of Deep Think via Process Reward Model-Guided Inference","display_name":"PRISM: Pushing the Frontier of Deep Think via Process Reward Model-Guided Inference","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133522813","doi":"https://doi.org/10.48550/arxiv.2603.02479"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02479","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02479","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02479","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128077487","display_name":"Rituraj Sharma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sharma, Rituraj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128039729","display_name":"Weiyuan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Weiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Provenzano, Noah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Provenzano, Noah","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128041333","display_name":"Tu Vu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Tu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.3382999897003174,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.3382999897003174,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.18379999697208405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.06920000165700912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6735000014305115},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6040999889373779},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.574400007724762},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5246000289916992},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5001999735832214},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.499099999666214},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.37139999866485596},{"id":"https://openalex.org/keywords/axiom","display_name":"Axiom","score":0.365200012922287}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6735000014305115},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6040999889373779},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6011999845504761},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.574400007724762},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5246000289916992},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5001999735832214},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.499099999666214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4401000142097473},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3781000077724457},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C167729594","wikidata":"https://www.wikidata.org/wiki/Q17736","display_name":"Axiom","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.3517000079154968},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.30869999527931213},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30079999566078186},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C162376815","wikidata":"https://www.wikidata.org/wiki/Q2158281","display_name":"Frequentist inference","level":4,"score":0.2768000066280365},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02479","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02479","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02479","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02479","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"DEEPTHINK":[0,63,132],"methods":[1],"improve":[2],"reasoning":[3,116],"by":[4],"generating,":[5],"refining,":[6],"and":[7,20,47,65,84,100,107,122,137,144,175],"aggregating":[8],"populations":[9],"of":[10,62],"candidate":[11,91],"solutions,":[12,46],"which":[13,33,110],"enables":[14],"strong":[15],"performance":[16],"on":[17,114,141,178],"complex":[18],"mathematical":[19],"scientific":[21],"tasks.":[22],"However,":[23],"existing":[24,131],"frameworks":[25],"often":[26,176],"lack":[27],"reliable":[28,166],"correctness":[29],"signals":[30],"during":[31,163],"inference,":[32],"creates":[34],"a":[35,59,68,96],"population-enhancement":[36],"bottleneck":[37],"where":[38],"deeper":[39],"deliberation":[40],"amplifies":[41],"errors,":[42],"suppresses":[43],"correct":[44,173],"minority":[45],"yields":[48],"weak":[49],"returns":[50],"to":[51,79],"additional":[52],"compute.":[53],"In":[54],"this":[55],"paper,":[56],"we":[57],"introduce":[58],"functional":[60],"decomposition":[61],"systems":[64],"propose":[66],"PRISM,":[67],"Process":[69],"Reward":[70],"Model":[71],"(PRM)-guided":[72],"inference":[73],"algorithm":[74],"that":[75,157],"uses":[76],"step-level":[77],"verification":[78],"guide":[80],"both":[81],"population":[82,103,170],"refinement":[83],"solution":[85],"aggregation.":[86],"During":[87],"refinement,":[88,109,164],"PRISM":[89,125,158],"treats":[90],"solutions":[92],"as":[93],"particles":[94],"in":[95],"PRM-defined":[97],"energy":[98],"landscape":[99],"reshapes":[101],"the":[102,168,179],"through":[104],"score-guided":[105],"resampling":[106],"stochastic":[108],"concentrates":[111],"probability":[112],"mass":[113],"higher-quality":[115],"while":[117,148],"preserving":[118],"diversity.":[119],"Across":[120],"mathematics":[121],"science":[123],"benchmarks,":[124],"is":[126],"competitive":[127],"with":[128,139],"or":[129,150],"outperforms":[130],"methods,":[133],"reaching":[134],"90.0%,":[135],"75.4%,":[136],"71.4%":[138],"gpt-oss-20b":[140],"AIME25,":[142],"HMMT25,":[143],"GPQA":[145],"Diamond,":[146],"respectively,":[147],"matching":[149],"exceeding":[151],"gpt-oss-120b.":[152],"Additionally,":[153],"our":[154],"analysis":[155],"shows":[156],"produces":[159],"consistent":[160],"net-directional":[161],"correction":[162],"remains":[165],"when":[167],"initial":[169],"contains":[171],"few":[172],"candidates,":[174],"lies":[177],"compute-accuracy":[180],"Pareto":[181],"frontier.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-05T00:00:00"}
