{"id":"https://openalex.org/W4404782113","doi":"https://doi.org/10.18653/v1/2024.findings-emnlp.234","title":"Reward Modeling Requires Automatic Adjustment Based on Data Quality","display_name":"Reward Modeling Requires Automatic Adjustment Based on Data Quality","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4404782113","doi":"https://doi.org/10.18653/v1/2024.findings-emnlp.234"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2024.findings-emnlp.234","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2024.findings-emnlp.234","pdf_url":"https://aclanthology.org/2024.findings-emnlp.234.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.findings-emnlp.234.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112223450","display_name":"Binghai Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Binghai Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055879965","display_name":"Rui Zheng","orcid":"https://orcid.org/0000-0002-4225-7133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rui Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432094","display_name":"Lu Chen","orcid":"https://orcid.org/0000-0002-5701-9306"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036875405","display_name":"Zhiheng Xi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiheng Xi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017604405","display_name":"Wei Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Shen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053105889","display_name":"Yuhao Zhou","orcid":"https://orcid.org/0000-0001-8074-6416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuhao Zhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037756245","display_name":"Yan Dong","orcid":"https://orcid.org/0000-0002-4461-830X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058353652","display_name":"Tao Gui","orcid":"https://orcid.org/0000-0002-6154-0751"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Gui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100360194","display_name":"Qi Zhang","orcid":"https://orcid.org/0000-0001-5303-9804"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088834359","display_name":"Xuanjing Huang","orcid":"https://orcid.org/0000-0001-9197-9426"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuanjing Huang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5112223450"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9439,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.79658344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4041","last_page":"4064"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.3073999881744385,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.3073999881744385,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13359","display_name":"Sports Science and Education","score":0.273499995470047,"subfield":{"id":"https://openalex.org/subfields/1207","display_name":"History and Philosophy of Science"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7042791247367859},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5358608365058899},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.5180075764656067},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.13642266392707825}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7042791247367859},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5358608365058899},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.5180075764656067},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.13642266392707825},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.findings-emnlp.234","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2024.findings-emnlp.234","pdf_url":"https://aclanthology.org/2024.findings-emnlp.234.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.findings-emnlp.234","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2024.findings-emnlp.234","pdf_url":"https://aclanthology.org/2024.findings-emnlp.234.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2504063345","display_name":null,"funder_award_id":"6244160","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327803","display_name":"Shanghai Rising-Star Program","ror":null},{"id":"https://openalex.org/F4320335796","display_name":"Program of Shanghai Academic Research Leader","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404782113.pdf","grobid_xml":"https://content.openalex.org/works/W4404782113.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"In":[0],"Reinforcement":[1],"Learning":[2],"from":[3,82],"Human":[4],"Feedback":[5],"(RLHF),":[6],"the":[7,28,51,56,69,87,96,101,130,156],"reward":[8,29,79,97,114,123,150],"model":[9,17,30,80,98,115,151],"plays":[10],"a":[11,33,36,118],"crucial":[12],"role":[13],"in":[14,68,78,89,113],"aligning":[15],"language":[16],"outputs":[18],"with":[19,39,46],"human":[20,22,47,70,83,142],"values.The":[21],"preference":[23,71,143],"data":[24,106,127],"used":[25],"to":[26,50,74,92],"train":[27],"consists":[31],"of":[32,55,103,107,132,138,159],"prompt":[34],"and":[35,53,63,76,105,134,153],"response":[37,43,93],"pair,":[38],"humans":[40],"annotating":[41],"which":[42],"better":[44],"aligns":[45],"value":[48],"preferences.Due":[49],"complexity":[52],"subjectivity":[54],"annotation":[57],"task,":[58],"multiple":[59,141],"organizations":[60],"including":[61],"OpenAI":[62],"Anthropic":[64],"report":[65],"significant":[66,111],"noise":[67,133],"datasets,":[72],"leading":[73],"instability":[75],"deviation":[77],"training":[81,152],"values.We":[84],"discover":[85],"that":[86,120,146],"difference":[88],"scores":[90],"assigned":[91],"pairs":[94],"by":[95],"effectively":[99],"indicates":[100],"quality":[102],"data,":[104],"varying":[108],"qualities":[109],"show":[110],"distinctions":[112],"training.We":[116],"introduce":[117],"method":[119,148],"automatically":[121],"adjusts":[122],"modeling":[124],"based":[125],"on":[126,140],"quality,":[128],"reducing":[129],"impact":[131],"making":[135],"full":[136],"use":[137],"dataset.Experiments":[139],"datasets":[144],"demonstrate":[145],"our":[147],"stabilizes":[149],"significantly":[154],"enhances":[155],"alignment":[157],"performance":[158],"RLHF.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
