{"id":"https://openalex.org/W7138864514","doi":"https://doi.org/10.1609/aaai.v40i39.40600","title":"Improving Value-based Process Verifier via Low-Cost Variance Reduction","display_name":"Improving Value-based Process Verifier via Low-Cost Variance Reduction","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138864514","doi":"https://doi.org/10.1609/aaai.v40i39.40600"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i39.40600","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i39.40600","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40600/44561","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40600/44561","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zetian Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zetian Sun","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Dongfang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongfang Li","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Baotian Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baotian Hu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Min Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74267448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"39","first_page":"33162","last_page":"33170"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.16580000519752502,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.16580000519752502,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.11720000207424164,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10019999742507935,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.7232000231742859},{"id":"https://openalex.org/keywords/variance-reduction","display_name":"Variance reduction","score":0.6355000138282776},{"id":"https://openalex.org/keywords/control-variates","display_name":"Control variates","score":0.5418000221252441},{"id":"https://openalex.org/keywords/bias-of-an-estimator","display_name":"Bias of an estimator","score":0.5375000238418579},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5175999999046326},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.5113000273704529},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.47760000824928284},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4708999991416931}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.7232000231742859},{"id":"https://openalex.org/C62644790","wikidata":"https://www.wikidata.org/wiki/Q3454689","display_name":"Variance reduction","level":3,"score":0.6355000138282776},{"id":"https://openalex.org/C121683094","wikidata":"https://www.wikidata.org/wiki/Q3554721","display_name":"Control variates","level":5,"score":0.5418000221252441},{"id":"https://openalex.org/C191393472","wikidata":"https://www.wikidata.org/wiki/Q15222032","display_name":"Bias of an estimator","level":4,"score":0.5375000238418579},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5175999999046326},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.5113000273704529},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.499099999666214},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.47760000824928284},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4708999991416931},{"id":"https://openalex.org/C165646398","wikidata":"https://www.wikidata.org/wiki/Q3755281","display_name":"Minimum-variance unbiased estimator","level":3,"score":0.460099995136261},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.444599986076355},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.44119998812675476},{"id":"https://openalex.org/C134962040","wikidata":"https://www.wikidata.org/wiki/Q7606742","display_name":"Stein's unbiased risk estimate","level":5,"score":0.43230000138282776},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.43059998750686646},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.424699991941452},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4074999988079071},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.39640000462532043},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3612000048160553},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3540000021457672},{"id":"https://openalex.org/C35594927","wikidata":"https://www.wikidata.org/wiki/Q2265984","display_name":"Efficient estimator","level":4,"score":0.3522000014781952},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C111350023","wikidata":"https://www.wikidata.org/wiki/Q1191869","display_name":"Markov chain Monte Carlo","level":3,"score":0.32280001044273376},{"id":"https://openalex.org/C164172150","wikidata":"https://www.wikidata.org/wiki/Q1782585","display_name":"Consistent estimator","level":4,"score":0.2574000060558319}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i39.40600","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i39.40600","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40600/44561","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i39.40600","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i39.40600","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40600/44561","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1477544716","display_name":null,"funder_award_id":"Guangdong","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2981938667","display_name":null,"funder_award_id":"Shenzhen","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G472678898","display_name":null,"funder_award_id":"2025A","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G4731818174","display_name":null,"funder_award_id":"51501137","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138864514.pdf","grobid_xml":"https://content.openalex.org/works/W7138864514.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,19,61,154],"a":[9,25,36,42,46,65,107,151],"wide":[10],"range":[11],"of":[12,35,67,71,82,181],"tasks.":[13],"However,":[14],"their":[15,53,62],"reasoning":[16,38],"capabilities,":[17],"particularly":[18],"complex":[20],"domains":[21],"like":[22],"mathematics,":[23],"remain":[24],"significant":[26],"challenge.":[27],"Value-based":[28],"process":[29],"verifiers,":[30],"which":[31,126],"estimate":[32],"the":[33,68,79,91,103,115,119,134,138,172,179,193],"probability":[34],"partial":[37],"chain":[39],"leading":[40],"to":[41,78,150,177],"correct":[43],"solution,":[44],"are":[45],"promising":[47],"approach":[48],"for":[49],"improving":[50],"reasoning.":[51],"Nevertheless,":[52],"effectiveness":[54,180],"is":[55,106],"often":[56],"hindered":[57],"by":[58,131,190,196],"estimation":[59,92,160],"error":[60,93],"training":[63],"annotations,":[64],"consequence":[66],"limited":[69],"number":[70],"Monte":[72,121],"Carlo":[73,122],"(MC)":[74],"samples":[75],"feasible":[76],"due":[77],"high":[80,97],"cost":[81],"LLM":[83,163],"inference.":[84],"In":[85],"this":[86],"paper,":[87],"we":[88,117,144],"identify":[89],"that":[90,146],"primarily":[94],"arises":[95],"from":[96,137],"variance":[98],"rather":[99],"than":[100],"bias,":[101],"and":[102,140,174],"MC":[104,135],"estimator":[105,130],"Minimum":[108],"Variance":[109],"Unbiased":[110],"Estimator":[111],"(MVUE).":[112],"To":[113],"address":[114],"problem,":[116],"propose":[118],"Compound":[120],"Sampling":[123],"(ComMCS)":[124],"method,":[125],"constructs":[127],"an":[128,158],"unbiased":[129,159],"linearly":[132],"combining":[133],"estimators":[136],"current":[139],"subsequent":[141],"steps.":[142],"Theoretically,":[143],"show":[145],"our":[147,182],"method":[148,189],"leads":[149],"predictable":[152],"reduction":[153],"variance,":[155],"while":[156],"maintaining":[157],"without":[161],"additional":[162],"inference":[164],"cost.":[165],"We":[166],"also":[167],"perform":[168],"empirical":[169],"experiments":[170],"on":[171,199,201],"MATH-500":[173,200],"GSM8K":[175],"benchmarks":[176],"demonstrate":[178],"method.":[183],"Notably,":[184],"ComMCS":[185],"outperforms":[186],"regression-based":[187],"optimization":[188],"2.8":[191],"points,":[192],"non-variance-reduced":[194],"baseline":[195],"2.2":[197],"points":[198],"Best-of-32":[202],"sampling":[203],"experiment.":[204]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-02-02T00:00:00"}
