{"id":"https://openalex.org/W7135050512","doi":"https://doi.org/10.48550/arxiv.2603.10588","title":"Does LLM Alignment Really Need Diversity? An Empirical Study of Adapting RLVR Methods for Moral Reasoning","display_name":"Does LLM Alignment Really Need Diversity? An Empirical Study of Adapting RLVR Methods for Moral Reasoning","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135050512","doi":"https://doi.org/10.48550/arxiv.2603.10588"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10588","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10588","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128853639","display_name":"Zhaowei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Zhaowei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Liu, Xiaohan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xiaohan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128916831","display_name":"Xuekai Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xuekai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090187041","display_name":"Junchao Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Junchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036980687","display_name":"Ceyao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ceyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128837271","display_name":"Zhiyuan Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128824940","display_name":"Yaodong Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yaodong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128890910","display_name":"Xiaoyuan Yi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi, Xiaoyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128920399","display_name":"Xing Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Xing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5128853639"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.15790000557899475,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.15790000557899475,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12460000067949295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.12349999696016312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.48489999771118164},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.47780001163482666},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4523000121116638},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.37940001487731934},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.37549999356269836},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.36550000309944153},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.33640000224113464},{"id":"https://openalex.org/keywords/empirical-evidence","display_name":"Empirical evidence","score":0.3357999920845032},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.3271999955177307}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6190999746322632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5023999810218811},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.48489999771118164},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.47780001163482666},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4523000121116638},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.37549999356269836},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C43971567","wikidata":"https://www.wikidata.org/wiki/Q3142865","display_name":"Logical reasoning","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2831999957561493},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.2793999910354614},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.27619999647140503},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C134752490","wikidata":"https://www.wikidata.org/wiki/Q374182","display_name":"Logical consequence","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C21847791","wikidata":"https://www.wikidata.org/wiki/Q191081","display_name":"Logical conjunction","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C9158031","wikidata":"https://www.wikidata.org/wiki/Q6909140","display_name":"Moral reasoning","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2615000009536743}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10588","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10588","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10588","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"with":[2],"verifiable":[3],"rewards":[4],"(RLVR)":[5],"has":[6],"achieved":[7],"remarkable":[8],"success":[9],"in":[10,35],"logical":[11],"reasoning":[12,119,173],"tasks,":[13],"yet":[14],"whether":[15],"large":[16],"language":[17],"model":[18],"(LLM)":[19],"alignment":[20,43,104,149,155],"requires":[21],"fundamentally":[22],"different":[23],"approaches":[24,92],"remains":[25],"unclear.":[26],"Given":[27],"the":[28,57],"apparent":[29],"tolerance":[30],"for":[31,148],"multiple":[32],"valid":[33],"responses":[34,111],"moral":[36,118,172],"reasoning,":[37,127],"a":[38,74,80],"natural":[39],"hypothesis":[40],"is":[41],"that":[42,90,117,154],"tasks":[44,156],"inherently":[45,159],"require":[46,160],"diversity-seeking":[47],"distribution-matching":[48,91],"algorithms":[49],"rather":[50],"than":[51,125],"reward-maximizing":[52,99,165],"policy-based":[53],"methods.":[54],"We":[55],"conduct":[56],"first":[58],"comprehensive":[59],"empirical":[60],"study":[61],"comparing":[62],"both":[63],"paradigms":[64],"on":[65,103],"MoReBench.":[66],"To":[67],"enable":[68],"stable":[69],"RLVR":[70,166],"training,":[71],"we":[72,88,115],"build":[73],"rubric-grounded":[75],"reward":[76],"pipeline":[77],"by":[78],"training":[79],"Qwen3-1.7B":[81],"judge":[82],"model.":[83],"Contrary":[84],"to":[85,112,171],"our":[86],"hypothesis,":[87],"find":[89],"do":[93,157],"not":[94,158],"demonstrate":[95,116],"significant":[96],"advantages":[97],"over":[98],"methods":[100,167],"as":[101],"expected":[102],"tasks.":[105,150],"Through":[106],"semantic":[107,113],"visualization":[108],"mapping":[109],"high-reward":[110,123],"space,":[114],"exhibits":[120],"more":[121,146],"concentrated":[122],"distributions":[124],"mathematical":[126],"where":[128],"diverse":[129],"solution":[130],"strategies":[131],"yield":[132],"similarly":[133],"high":[134],"rewards.":[135],"This":[136],"counter-intuitive":[137],"finding":[138],"explains":[139],"why":[140],"mode-seeking":[141],"optimization":[142],"proves":[143],"equally":[144],"or":[145],"effective":[147],"Our":[151],"results":[152],"suggest":[153],"diversity-preserving":[161],"algorithms,":[162],"and":[163],"standard":[164],"can":[168],"effectively":[169],"transfer":[170],"without":[174],"explicit":[175],"diversity":[176],"mechanisms.":[177]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-03-13T00:00:00"}
