{"id":"https://openalex.org/W4416037018","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.217","title":"ReMedy: Learning Machine Translation Evaluation from Human Preferences with Reward Modeling","display_name":"ReMedy: Learning Machine Translation Evaluation from Human Preferences with Reward Modeling","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416037018","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.217"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.217","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.217","pdf_url":"https://aclanthology.org/2025.emnlp-main.217.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.217.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041441234","display_name":"Shaomu Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Shaomu Tan","raw_affiliation_strings":["Language Technology Lab University of Amsterdam"],"affiliations":[{"raw_affiliation_string":"Language Technology Lab University of Amsterdam","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109059955","display_name":"Christof Monz","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Christof Monz","raw_affiliation_strings":["Language Technology Lab University of Amsterdam"],"affiliations":[{"raw_affiliation_string":"Language Technology Lab University of Amsterdam","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5041441234"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17573286,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4370","last_page":"4387"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.20149999856948853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.20149999856948853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.11729999631643295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08150000125169754,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7354999780654907},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.608299970626831},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.569100022315979},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.5618000030517578},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5146999955177307},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5038999915122986},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5033000111579895},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.48669999837875366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7878999710083008},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7354999780654907},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6657999753952026},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.608299970626831},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5741999745368958},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.569100022315979},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.5618000030517578},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5146999955177307},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5038999915122986},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5033000111579895},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.48669999837875366},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.46889999508857727},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41600000858306885},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2985999882221222},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C135784402","wikidata":"https://www.wikidata.org/wiki/Q6958279","display_name":"Evaluation of machine translation","level":5,"score":0.25760000944137573},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.2542000114917755}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.217","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.217","pdf_url":"https://aclanthology.org/2025.emnlp-main.217.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire/52c4301a-6b1d-4e0e-9f24-e6e42ffa1fab","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/remedy-learning-machine-translation-evaluation-from-human-preferences-with-reward-modeling(52c4301a-6b1d-4e0e-9f24-e6e42ffa1fab).html","pdf_url":"https://pure.uva.nl/ws/files/305314989/2025.emnlp-main.217.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Tan, S & Monz, C 2025, ReMedy: Learning Machine Translation Evaluation from Human Preferences with Reward Modeling. in C Christodoulopoulos, T Chakraborty, C Rose & V Peng (eds), The 2025 Conference on Empirical Methods in Natural Language Processing : Proceedings of the Conference : EMNLP 2025 : November 4-9, 2025. Kerrville, TX, pp. 4370-4387, 30th Conference on Empirical Methods in Natural Language Processing, EMNLP 2025, Suzhou, China, 4/11/25. https://doi.org/10.18653/v1/2025.emnlp-main.217","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.217","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.217","pdf_url":"https://aclanthology.org/2025.emnlp-main.217.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1865716193","display_name":"Multilingual Language Technology for Machine Translation and Language Understanding","funder_award_id":"2023.017","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G629491556","display_name":null,"funder_award_id":"(NWO)","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G8220939175","display_name":"Multi-Parallel Neural Machine Translation","funder_award_id":"VI.C.192.080","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416037018.pdf","grobid_xml":"https://content.openalex.org/works/W4416037018.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"key":[1],"challenge":[2],"in":[3,71,122],"MT":[4,42,86],"evaluation":[5,28,48],"is":[6],"the":[7],"inherent":[8],"noise":[9],"and":[10,102,112,126],"inconsistency":[11],"of":[12,54],"human":[13,58],"ratings.Regression-based":[14],"neural":[15],"metrics":[16],"struggle":[17],"with":[18],"this":[19,35],"noise,":[20],"while":[21],"prompting":[22],"LLMs":[23,105],"shows":[24],"promise":[25],"at":[26,32,92],"system-level":[27,95],"but":[29],"performs":[30],"poorly":[31],"segment":[33],"level.In":[34],"work,":[36],"we":[37],"propose":[38],"ReMedy,":[39],"a":[40,50,72],"novel":[41],"metric":[43],"framework":[44],"that":[45,117],"reformulates":[46],"translation":[47,64,124],"as":[49,107],"reward":[51],"modeling":[52],"task.Instead":[53],"regressing":[55],"on":[56],"imperfect":[57],"ratings":[59],"directly,":[60],"ReMedy":[61,88,118],"learns":[62],"relative":[63],"quality":[65],"using":[66],"pairwise":[67],"preference":[68],"data,":[69],"resulting":[70],"more":[73],"reliable":[74],"evaluation.In":[75],"extensive":[76],"experiments":[77],"across":[78],"WMT22-24":[79],"shared":[80],"tasks":[81],"(39":[82],"language":[83],"pairs,":[84],"111":[85],"systems),":[87],"achieves":[89],"stateof-the-art":[90],"performance":[91],"both":[93],"segment-and":[94],"evaluation.Specifically,":[96],"ReMedy-9B":[97],"surpasses":[98],"larger":[99],"WMT":[100],"winners":[101],"massive":[103],"closed":[104],"such":[106],"MetricX-13B,":[108],"XCOMET-Ensemble,":[109],"GEMBA-GPT-4,":[110],"PaLM-540B,":[111],"finetuned":[113],"PaLM2.Further":[114],"analyses":[115],"demonstrate":[116],"delivers":[119],"superior":[120],"capability":[121],"detecting":[123],"errors":[125],"evaluating":[127],"low-quality":[128],"translations.":[129],"1":[130]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-08T00:00:00"}
