{"id":"https://openalex.org/W7125501806","doi":"https://doi.org/10.48550/arxiv.2601.15609","title":"When Sharpening Becomes Collapse: Sampling Bias and Semantic Coupling in RL with Verifiable Rewards","display_name":"When Sharpening Becomes Collapse: Sampling Bias and Semantic Coupling in RL with Verifiable Rewards","publication_year":2026,"publication_date":"2026-01-22","ids":{"openalex":"https://openalex.org/W7125501806","doi":"https://doi.org/10.48550/arxiv.2601.15609"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.15609","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.15609","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.15609","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123657533","display_name":"Mingyuan Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fan, Mingyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123697390","display_name":"Weiguang Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Weiguang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123676022","display_name":"Daixin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Daixin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123669260","display_name":"Cen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Cen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123706860","display_name":"Zhiqiang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhiqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123723705","display_name":"Jun Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jun","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5123657533"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.32589998841285706,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.32589998841285706,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1624000072479248,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.05979999899864197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.7854999899864197},{"id":"https://openalex.org/keywords/sharpening","display_name":"Sharpening","score":0.6607000231742859},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6521999835968018},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5796999931335449},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.5321999788284302},{"id":"https://openalex.org/keywords/coupling","display_name":"Coupling (piping)","score":0.4023999869823456},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.3801000118255615}],"concepts":[{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.7854999899864197},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7838000059127808},{"id":"https://openalex.org/C2781137444","wikidata":"https://www.wikidata.org/wiki/Q237105","display_name":"Sharpening","level":2,"score":0.6607000231742859},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6521999835968018},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5796999931335449},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.5321999788284302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48069998621940613},{"id":"https://openalex.org/C131584629","wikidata":"https://www.wikidata.org/wiki/Q4308705","display_name":"Coupling (piping)","level":2,"score":0.4023999869823456},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3840000033378601},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.3801000118255615},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.3776000142097473},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37299999594688416},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.3513000011444092},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31139999628067017},{"id":"https://openalex.org/C206654554","wikidata":"https://www.wikidata.org/wiki/Q5374247","display_name":"Empirical measure","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C159023740","wikidata":"https://www.wikidata.org/wiki/Q623276","display_name":"Deadlock","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C50335755","wikidata":"https://www.wikidata.org/wiki/Q483247","display_name":"Phenomenon","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.2556000053882599}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.15609","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.15609","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.15609","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.15609","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"with":[2],"Verifiable":[3],"Rewards":[4],"(RLVR)":[5],"is":[6],"a":[7,50,63,77,104],"central":[8],"paradigm":[9],"for":[10],"turning":[11],"large":[12],"language":[13],"models":[14],"(LLMs)":[15],"into":[16],"reliable":[17],"problem":[18],"solvers,":[19],"especially":[20],"in":[21],"logic-heavy":[22],"domains.":[23],"Despite":[24],"its":[25],"empirical":[26],"success,":[27],"it":[28],"remains":[29],"unclear":[30],"whether":[31],"RLVR":[32],"elicits":[33],"novel":[34],"capabilities":[35],"or":[36],"merely":[37],"sharpens":[38],"the":[39,53],"distribution":[40],"over":[41],"existing":[42],"knowledge.":[43],"We":[44],"study":[45],"this":[46],"by":[47],"formalizing":[48],"over-sharpening,":[49],"phenomenon":[51],"where":[52],"policy":[54],"collapses":[55],"onto":[56],"limited":[57],"modes,":[58,75],"suppressing":[59],"valid":[60],"alternatives.":[61],"At":[62],"high":[64],"level,":[65],"we":[66,88],"discover":[67],"finite-batch":[68],"updates":[69],"intrinsically":[70],"bias":[71],"learning":[72],"toward":[73],"sampled":[74],"triggering":[76],"collapse":[78],"that":[79,110],"propagates":[80],"globally":[81],"via":[82,103],"semantic":[83],"coupling.":[84],"To":[85],"mitigate":[86],"this,":[87],"propose":[89],"inverse-success":[90],"advantage":[91],"calibration":[92,99],"to":[93,100],"prioritize":[94],"difficult":[95],"queries":[96],"and":[97],"distribution-level":[98],"diversify":[101],"sampling":[102],"memory":[105],"network.":[106],"Empirical":[107],"evaluations":[108],"validate":[109],"our":[111],"strategies":[112],"can":[113],"effectively":[114],"improve":[115],"generalization.":[116]},"counts_by_year":[],"updated_date":"2026-01-24T23:27:35.965710","created_date":"2026-01-24T00:00:00"}
