{"id":"https://openalex.org/W4400375420","doi":"https://doi.org/10.48550/arxiv.2407.02552","title":"RLHF Can Speak Many Languages: Unlocking Multilingual Preference Optimization for LLMs","display_name":"RLHF Can Speak Many Languages: Unlocking Multilingual Preference Optimization for LLMs","publication_year":2024,"publication_date":"2024-07-02","ids":{"openalex":"https://openalex.org/W4400375420","doi":"https://doi.org/10.48550/arxiv.2407.02552"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.02552","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02552","pdf_url":"https://arxiv.org/pdf/2407.02552","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.02552","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111250631","display_name":"John Dang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dang, John","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104338915","display_name":"Arash Ahmadian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmadian, Arash","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098901827","display_name":"Kelly Marchisio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marchisio, Kelly","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048307591","display_name":"Julia Kreutzer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kreutzer, Julia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063061204","display_name":"Ahmet \u00dcst\u00fcn","orcid":"https://orcid.org/0000-0002-1640-4291"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"\u00dcst\u00fcn, Ahmet","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5078850040","display_name":"Sara Hooker","orcid":"https://orcid.org/0000-0002-0190-6459"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hooker, Sara","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5111250631"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9384999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9297999739646912,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.7269387245178223},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4914029836654663},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.17023718357086182},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.11208036541938782}],"concepts":[{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.7269387245178223},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4914029836654663},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.17023718357086182},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.11208036541938782}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.02552","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02552","pdf_url":"https://arxiv.org/pdf/2407.02552","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2407.02552","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.02552","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.02552","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02552","pdf_url":"https://arxiv.org/pdf/2407.02552","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4400375420.pdf","grobid_xml":"https://content.openalex.org/works/W4400375420.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Preference":[0],"optimization":[1],"techniques":[2,156],"have":[3],"become":[4],"a":[5,38,60,73,82,114,131,145],"standard":[6],"final":[7],"stage":[8],"for":[9,86],"training":[10],"state-of-art":[11],"large":[12],"language":[13],"models":[14,139],"(LLMs).":[15],"However,":[16],"despite":[17],"widespread":[18],"adoption,":[19],"the":[20,42,45,98,121,152,163],"vast":[21],"majority":[22],"of":[23,41,54,100,147,154,162],"work":[24],"to-date":[25],"has":[26],"focused":[27],"on":[28],"first-class":[29],"citizen":[30],"languages":[31,43,159],"like":[32,140],"English":[33],"and":[34,103,130],"Chinese.":[35],"This":[36],"captures":[37],"small":[39],"fraction":[40],"in":[44,76,107,126],"world,":[46],"but":[47],"also":[48],"makes":[49],"it":[50],"unclear":[51],"which":[52],"aspects":[53],"current":[55,122],"state-of-the-art":[56,75,123],"research":[57],"transfer":[58,102],"to":[59,71,92,157],"multilingual":[61,78,89,124],"setting.":[62],"In":[63],"this":[64],"work,":[65],"we":[66,150],"perform":[67],"an":[68],"exhaustive":[69],"study":[70],"achieve":[72],"new":[74],"aligning":[77],"LLMs.":[79],"We":[80,96],"introduce":[81],"novel,":[83],"scalable":[84],"method":[85],"generating":[87],"high-quality":[88],"feedback":[90],"data":[91,94],"balance":[93],"coverage.":[95],"establish":[97],"benefits":[99],"cross-lingual":[101],"increased":[104],"dataset":[105],"size":[106],"preference":[108],"training.":[109],"Our":[110],"preference-trained":[111],"model":[112],"achieves":[113],"54.4%":[115],"win-rate":[116,133],"against":[117,136],"Aya":[118],"23":[119,158],"8B,":[120],"LLM":[125],"its":[127],"parameter":[128],"class,":[129],"69.5%":[132],"or":[134],"higher":[135],"widely":[137],"used":[138],"Gemma-1.1-7B-it,":[141],"Llama-3-8B-Instruct,":[142],"Mistral-7B-Instruct-v0.3.":[143],"As":[144],"result":[146],"our":[148],"study,":[149],"expand":[151],"frontier":[153],"alignment":[155],"covering":[160],"half":[161],"world's":[164],"population.":[165]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
