{"id":"https://openalex.org/W7161724498","doi":"https://doi.org/10.48550/arxiv.2605.17333","title":"Leveraging Error Diversity in Group Rollouts for Reinforcement Learning","display_name":"Leveraging Error Diversity in Group Rollouts for Reinforcement Learning","publication_year":2026,"publication_date":"2026-05-17","ids":{"openalex":"https://openalex.org/W7161724498","doi":"https://doi.org/10.48550/arxiv.2605.17333"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17333","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17333","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17333","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136479148","display_name":"Wenpu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Wenpu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111103473","display_name":"Yuqi Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yuqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136355786","display_name":"Weichu Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Weichu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136482371","display_name":"Yongfu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yongfu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136458617","display_name":"Shuai Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Shuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136460514","display_name":"Ziyue Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ziyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136499313","display_name":"Wenqi Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Wenqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136484208","display_name":"Xiaoying Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaoying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136481577","display_name":"Tong Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Tong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136488634","display_name":"Nan Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Nan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136488579","display_name":"Jiaqi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7827000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7827000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.021700000390410423,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.016699999570846558,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edas","display_name":"EDAS","score":0.8452000021934509},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5781000256538391},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.5378999710083008},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.3517000079154968},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.34060001373291016},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.3294000029563904},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.3249000012874603},{"id":"https://openalex.org/keywords/process-capability-index","display_name":"Process capability index","score":0.29339998960494995}],"concepts":[{"id":"https://openalex.org/C49284225","wikidata":"https://www.wikidata.org/wiki/Q5322829","display_name":"EDAS","level":3,"score":0.8452000021934509},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6863999962806702},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5781000256538391},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.5378999710083008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5328999757766724},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5163000226020813},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.3517000079154968},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.34060001373291016},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C190190378","wikidata":"https://www.wikidata.org/wiki/Q1192625","display_name":"Process capability index","level":3,"score":0.29339998960494995},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C94822996","wikidata":"https://www.wikidata.org/wiki/Q1777902","display_name":"Satisficing","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C85973986","wikidata":"https://www.wikidata.org/wiki/Q1091731","display_name":"Exploratory research","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C97399411","wikidata":"https://www.wikidata.org/wiki/Q825367","display_name":"Coin flipping","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25699999928474426},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17333","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17333","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17333","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17333","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"from":[2,68],"Verifiable":[3],"Rewards":[4],"(RLVR)":[5],"typically":[6],"samples":[7],"multiple":[8],"responses":[9],"per":[10],"prompt":[11],"and":[12,110,126,162],"assigns":[13],"binary":[14],"rewards":[15],"based":[16,98],"on":[17,99,150,181],"individual":[18],"correctness,":[19],"yet":[20],"the":[21,25,29,92,119,189],"collective":[22],"structure":[23],"of":[24,31,56,152,160,176],"group":[26,51,193],"output,":[27],"specifically":[28],"distribution":[30],"errors,":[32],"is":[33,52,195],"largely":[34],"discarded.":[35],"We":[36,147],"identify":[37],"this":[38,77],"as":[39,133],"a":[40,50,53,86,134,158,196],"missed":[41],"opportunity:":[42],"empirical":[43],"analysis":[44],"reveals":[45],"that":[46,90,138,187],"error":[47,101,128],"diversity":[48],"within":[49],"strong":[54],"predictor":[55],"training":[57],"success,":[58],"with":[59],"problems":[60],"eliciting":[61],"diverse":[62,123],"wrong":[63],"answers":[64],"benefiting":[65],"substantially":[66],"more":[67],"RLVR":[69,145,155],"than":[70],"those":[71],"producing":[72],"homogeneous":[73],"failures.":[74],"Motivated":[75],"by":[76],"observation,":[78],"we":[79],"propose":[80],"Error":[81],"Diversity":[82],"Advantage":[83],"Shaping":[84],"(EDAS),":[85],"lightweight,":[87],"algorithm-agnostic":[88],"technique":[89],"modulates":[91],"advantage":[93],"signal":[94],"for":[95,106,113,200],"incorrect":[96],"rollouts":[97,194],"intra-group":[100],"diversity.":[102],"EDAS":[103,131,149,171],"amplifies":[104],"penalties":[105,112],"dominant,":[107],"repeated":[108],"errors":[109],"attenuates":[111],"rare,":[114],"exploratory":[115],"ones,":[116],"thereby":[117],"encouraging":[118],"model":[120],"to":[121],"maintain":[122],"reasoning":[124],"paths":[125],"discouraging":[127],"perseveration.":[129],"Crucially,":[130],"operates":[132],"simple":[135],"post-hoc":[136],"adjustment":[137],"can":[139],"be":[140],"seamlessly":[141],"integrated":[142],"into":[143],"any":[144],"algorithm.":[146],"validate":[148],"top":[151],"several":[153],"mainstream":[154],"methods":[156],"across":[157,183],"series":[159],"models":[161],"seven":[163,184],"challenging":[164],"math":[165],"benchmarks,":[166,185],"demonstrating":[167],"consistent":[168],"improvements.":[169],"Notably,":[170],"yields":[172],"an":[173],"average":[174],"improvement":[175],"6.29":[177],"points":[178],"over":[179],"DAPO":[180],"Qwen3-8B":[182],"confirming":[186],"exploiting":[188],"latent":[190],"information":[191],"in":[192],"broadly":[197],"effective":[198],"strategy":[199],"strengthening":[201],"RLVR.":[202]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
