{"id":"https://openalex.org/W7152537877","doi":"https://doi.org/10.48550/arxiv.2604.07343","title":"Personalized RewardBench: Evaluating Reward Models with Human Aligned Personalization","display_name":"Personalized RewardBench: Evaluating Reward Models with Human Aligned Personalization","publication_year":2026,"publication_date":"2026-04-08","ids":{"openalex":"https://openalex.org/W7152537877","doi":"https://doi.org/10.48550/arxiv.2604.07343"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.07343","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07343","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.07343","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133278791","display_name":"Qiyao Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ma, Qiyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018828704","display_name":"Dechen Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Dechen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133303230","display_name":"Rui Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133310780","display_name":"Boqi Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Boqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042064864","display_name":"Hanchu Zhou","orcid":"https://orcid.org/0000-0002-6305-7907"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Hanchu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133312156","display_name":"Junshan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Junshan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5055811120","display_name":"Zhe Zhao","orcid":"https://orcid.org/0009-0008-9496-5917"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5133278791"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.7354999780654907,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.7354999780654907,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.0421999990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03060000017285347,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.6427000164985657},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6305000185966492},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6189000010490417},{"id":"https://openalex.org/keywords/downstream","display_name":"Downstream (manufacturing)","score":0.47279998660087585},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.47189998626708984},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4578999876976013},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.45320001244544983},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.36719998717308044}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7215999960899353},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.6427000164985657},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6305000185966492},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6189000010490417},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5389999747276306},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.492900013923645},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.47279998660087585},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.47189998626708984},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4578999876976013},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.45320001244544983},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2946999967098236},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.27869999408721924},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C191172861","wikidata":"https://www.wikidata.org/wiki/Q7899321","display_name":"Upstream (networking)","level":2,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.07343","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07343","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.07343","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07343","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7458155155181885}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Pluralistic":[0],"alignment":[1],"has":[2],"emerged":[3],"as":[4,21,198],"a":[5,22,60,157,172,199],"critical":[6],"frontier":[7],"in":[8,179,209],"the":[9,99,107],"development":[10],"of":[11,145],"Large":[12],"Language":[13],"Models":[14],"(LLMs),":[15],"with":[16,117,139,176],"reward":[17,41,67,135,152,158,206],"models":[18,42,136],"(RMs)":[19],"serving":[20],"central":[23],"mechanism":[24],"for":[25,32,44,204],"capturing":[26],"diverse":[27],"human":[28,103],"values.":[29],"While":[30],"benchmarks":[31],"general":[33,122],"response":[34,79],"quality":[35,123],"are":[36,95],"prevalent,":[37],"evaluating":[38,205],"how":[39],"well":[40],"account":[43],"individual":[45],"user":[46],"preferences":[47],"remains":[48],"an":[49,143,150],"open":[50],"challenge.":[51],"To":[52],"bridge":[53],"this":[54],"gap,":[55],"we":[56,164],"introduce":[57],"Personalized":[58,196],"RewardBench,":[59],"novel":[61],"benchmark":[62,154,170],"designed":[63],"to":[64,70,85,98,190],"rigorously":[65],"assess":[66],"models'":[68,207],"capacity":[69],"model":[71,153],"personalized":[72],"preferences.":[73],"We":[74],"construct":[75],"chosen":[76],"and":[77,127,184,201],"rejected":[78],"pairs":[80,112],"based":[81],"on":[82,161],"strict":[83],"adherence":[84],"(or":[86],"violation":[87],"of)":[88],"user-specific":[89],"rubrics,":[90],"ensuring":[91],"that":[92,106,132,168],"preference":[93],"distinctions":[94],"uniquely":[96],"tailored":[97],"individual.":[100],"In":[101],"particular,":[102],"evaluations":[104],"confirm":[105],"primary":[108],"discriminative":[109],"factor":[110],"between":[111],"is":[113],"strictly":[114],"personal":[115],"preference,":[116],"both":[118,180],"responses":[119],"maintaining":[120],"high":[121],"(e.g.,":[124],"correctness,":[125],"relevance":[126],"helpfulness).":[128],"Extensive":[129],"testing":[130],"reveals":[131],"existing":[133,191],"state-of-the-art":[134],"struggle":[137],"significantly":[138,173],"personalization,":[140],"peaking":[141],"at":[142],"accuracy":[144],"just":[146],"75.94%.":[147],"Crucially,":[148],"because":[149],"effective":[151],"should":[155],"predict":[156],"model's":[159],"performance":[160,178,208],"downstream":[162,177,210],"tasks,":[163],"conduct":[165],"experiments":[166],"demonstrating":[167],"our":[169],"exhibits":[171],"higher":[174],"correlation":[175],"Best-of-N":[181],"(BoN)":[182],"sampling":[183],"Proximal":[185],"Policy":[186],"Optimization":[187],"(PPO)":[188],"compared":[189],"baselines.":[192],"These":[193],"findings":[194],"establish":[195],"RewardBench":[197],"robust":[200],"accurate":[202],"proxy":[203],"applications.":[211]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-10T00:00:00"}
