{"id":"https://openalex.org/W4416036184","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.754","title":"Reward Model Perspectives: Whose Opinions Do Reward Models Reward?","display_name":"Reward Model Perspectives: Whose Opinions Do Reward Models Reward?","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036184","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.754"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.754","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.754","pdf_url":"https://aclanthology.org/2025.emnlp-main.754.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.754.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120049230","display_name":"Elle","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Elle","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5120049230"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40250474,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14931","last_page":"14955"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10467","display_name":"Psychometric Methodologies and Testing","score":0.07039999961853027,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10467","display_name":"Psychometric Methodologies and Testing","score":0.07039999961853027,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11084","display_name":"Workplace Health and Well-being","score":0.024299999698996544,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11994","display_name":"Human Resource Development and Performance Evaluation","score":0.023800000548362732,"subfield":{"id":"https://openalex.org/subfields/3202","display_name":"Applied Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.2921000123023987},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.24169999361038208},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.24060000479221344},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.23690000176429749}],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.44859999418258667},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36000001430511475},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3409999907016754},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.3077000081539154},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25870001316070557},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.24169999361038208},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.24060000479221344},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.23690000176429749},{"id":"https://openalex.org/C143661069","wikidata":"https://www.wikidata.org/wiki/Q670713","display_name":"Reward system","level":2,"score":0.23309999704360962}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.754","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.754","pdf_url":"https://aclanthology.org/2025.emnlp-main.754.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.754","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.754","pdf_url":"https://aclanthology.org/2025.emnlp-main.754.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036184.pdf","grobid_xml":"https://content.openalex.org/works/W4416036184.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reward":[0],"models":[1,10],"(RMs)":[2],"are":[3,101],"central":[4],"to":[5,21,51,64,86,120,141],"the":[6,40,49,60,68,75,126,143,150],"alignment":[7,41,137],"of":[8,28,42,62,70,92,132,145],"language":[9,151],"(LMs).An":[11],"RM":[12,29,88,133],"often":[13],"serves":[14],"as":[15],"a":[16,36,71],"proxy":[17],"for":[18,38,128],"human":[19],"preferences":[20,69],"guide":[22],"downstream":[23],"LM":[24],"behavior.However,":[25],"our":[26],"understanding":[27],"behavior":[30,134],"is":[31,117],"limited.Our":[32],"work":[33],"(i)":[34],"formalizes":[35],"framework":[37],"measuring":[39],"opinions":[43],"captured":[44],"by":[45],"RMs,":[46],"(ii)":[47],"investigates":[48],"extent":[50],"which":[52,83],"RMs":[53,100],"demonstrate":[54],"sociodemographic":[55],"biases,":[56],"and":[57,77,96,108,114],"(iii)":[58],"explores":[59],"effects":[61],"prompting":[63],"steer":[65],"rewards":[66],"towards":[67],"target":[72],"group.We":[73],"study":[74],"subjective":[76],"diverse":[78],"perspectives":[79,89],"on":[80],"controversial":[81],"topics,":[82],"allows":[84],"us":[85],"quantify":[87],"in":[90,135,149],"terms":[91],"their":[93],"opinions,":[94],"attitudes,":[95],"values.We":[97],"show":[98],"that":[99,153],"poorly":[102],"aligned":[103],"with":[104],"several":[105],"demographic":[106],"groups":[107],"can":[109],"systematically":[110],"reward":[111],"harmful":[112],"stereotypes,":[113],"steering":[115],"alone":[116],"not":[118],"enough":[119],"overcome":[121],"these":[122],"limitations.Our":[123],"findings":[124],"underscore":[125],"need":[127],"more":[129],"careful":[130],"consideration":[131],"model":[136],"during":[138],"preference":[139],"learning":[140],"prevent":[142],"propagation":[144],"unwanted":[146],"social":[147],"biases":[148],"technologies":[152],"we":[154],"use.":[155]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-11-08T00:00:00"}
