{"id":"https://openalex.org/W7134829930","doi":"https://doi.org/10.48550/arxiv.2603.08095","title":"DC-W2S: Dual-Consensus Weak-to-Strong Training for Reliable Process Reward Modeling in Biological Reasoning","display_name":"DC-W2S: Dual-Consensus Weak-to-Strong Training for Reliable Process Reward Modeling in Biological Reasoning","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134829930","doi":"https://doi.org/10.48550/arxiv.2603.08095"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08095","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128657411","display_name":"Chi-Min Chan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chan, Chi-Min","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057632577","display_name":"Ehsan Hajiramezanali","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hajiramezanali, Ehsan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086921718","display_name":"Xiner Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiner","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013970475","display_name":"Edward De Brouwer","orcid":"https://orcid.org/0000-0003-0608-0155"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Brouwer, Edward","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128679690","display_name":"Carl Edwards","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Edwards, Carl","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128680978","display_name":"Wei Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128689573","display_name":"Sirui Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Sirui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128651766","display_name":"Yike Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yike","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020099374","display_name":"Gabriele Scalia","orcid":"https://orcid.org/0000-0003-3305-9220"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Scalia, Gabriele","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.29910001158714294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.29910001158714294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.06210000067949295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.04520000144839287,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.659500002861023},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5867000222206116},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5299000144004822},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.5091000199317932},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4726000130176544},{"id":"https://openalex.org/keywords/outcome","display_name":"Outcome (game theory)","score":0.4717000126838684},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.46950000524520874},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.37310001254081726},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.3686000108718872}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7401999831199646},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.659500002861023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5931000113487244},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5867000222206116},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5299000144004822},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.5091000199317932},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4959000051021576},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4726000130176544},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.4717000126838684},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.46950000524520874},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.37310001254081726},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.3686000108718872},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.35519999265670776},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.33880001306533813},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3075999915599823},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C76956256","wikidata":"https://www.wikidata.org/wiki/Q27610560","display_name":"Process modeling","level":3,"score":0.29829999804496765},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.2549000084400177},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2535000145435333}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08095","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08095","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08095","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08095","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.5102237462997437,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"},{"score":0.4064020812511444,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"scientific":[1],"reasoning":[2,8,148],"tasks,":[3],"the":[4,7,14,26,41,52,89,107,133,141],"veracity":[5],"of":[6,44,54,123,143],"process":[9],"is":[10,38,158],"as":[11,13],"critical":[12],"final":[15],"outcome.":[16],"While":[17],"Process":[18],"Reward":[19,33],"Models":[20,34],"(PRMs)":[21],"offer":[22],"a":[23,121],"solution":[24],"to":[25,131],"coarse-grained":[27],"supervision":[28,112],"problems":[29],"inherent":[30],"in":[31,106],"Outcome":[32],"(ORMs),":[35],"their":[36],"deployment":[37],"hindered":[39],"by":[40],"prohibitive":[42],"cost":[43],"obtaining":[45],"expert-verified":[46],"step-wise":[47],"labels.":[48],"This":[49],"paper":[50],"addresses":[51],"challenge":[53],"training":[55,78,134,142,163],"reliable":[56],"PRMs":[57,145],"using":[58],"abundant":[59],"but":[60],"noisy":[61,81,166],"\"weak\"":[62],"supervision.":[63],"We":[64,118,136],"argue":[65],"that":[66,138,154],"existing":[67],"Weak-to-Strong":[68,91],"Generalization":[69],"(W2SG)":[70],"theories":[71],"lack":[72],"prescriptive":[73],"guidelines":[74],"for":[75,146],"selecting":[76],"high-quality":[77],"signals":[79,113],"from":[80],"data.":[82],"To":[83],"bridge":[84],"this":[85],"gap,":[86],"we":[87,110],"introduce":[88],"Dual-Consensus":[90],"(DC-W2S)":[92],"framework.":[93],"By":[94],"intersecting":[95],"Self-Consensus":[96],"(SC)":[97],"metrics":[98,105],"among":[99],"weak":[100],"supervisors":[101],"with":[102],"Neighborhood-Consensus":[103],"(NC)":[104],"embedding":[108],"space,":[109],"stratify":[111],"into":[114],"distinct":[115],"reliability":[116],"regimes.":[117],"then":[119],"employ":[120],"curriculum":[122],"instance-level":[124],"balanced":[125],"sampling":[126],"and":[127],"label-level":[128],"reliability-aware":[129],"masking":[130],"guide":[132],"process.":[135],"demonstrate":[137],"DC-W2S":[139],"enables":[140],"robust":[144],"complex":[147],"without":[149],"exhaustive":[150],"expert":[151],"annotation,":[152],"proving":[153],"strategic":[155],"data":[156],"curation":[157],"more":[159],"effective":[160],"than":[161],"indiscriminate":[162],"on":[164],"large-scale":[165],"datasets.":[167]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
