{"id":"https://openalex.org/W4406072085","doi":"https://doi.org/10.48550/arxiv.2407.14622","title":"BOND: Aligning LLMs with Best-of-N Distillation","display_name":"BOND: Aligning LLMs with Best-of-N Distillation","publication_year":2024,"publication_date":"2024-07-19","ids":{"openalex":"https://openalex.org/W4406072085","doi":"https://doi.org/10.48550/arxiv.2407.14622"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.14622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.14622","pdf_url":"https://arxiv.org/pdf/2407.14622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.14622","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035760608","display_name":"Pier Giuseppe Sessa","orcid":"https://orcid.org/0000-0001-8986-8815"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sessa, Pier Giuseppe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043577372","display_name":"Robert Dadashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dadashi, Robert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079135104","display_name":"L\u00e9onard Hussenot","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hussenot, L\u00e9onard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087706654","display_name":"Johan Ferret","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferret, Johan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071735499","display_name":"Nino Vieillard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vieillard, Nino","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057217987","display_name":"Alexandre Ram\u00e9","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ram\u00e9, Alexandre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115771709","display_name":"Bobak Shariari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shariari, Bobak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031705919","display_name":"Sarah Perrin","orcid":"https://orcid.org/0000-0002-4488-6689"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perrin, Sarah","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039658137","display_name":"Abe Friesen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Friesen, Abe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028486593","display_name":"Geoffrey Cideron","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cideron, Geoffrey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103445258","display_name":"Sertan Girgin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Girgin, Sertan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013012032","display_name":"Piotr Sta\u0144czyk","orcid":"https://orcid.org/0000-0003-0124-2936"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stanczyk, Piotr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046831695","display_name":"Andrea Michi","orcid":"https://orcid.org/0009-0001-4797-3593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michi, Andrea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025950368","display_name":"Danila Sinopalnikov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sinopalnikov, Danila","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035211063","display_name":"Sabela Ramos","orcid":"https://orcid.org/0000-0001-6656-9732"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramos, Sabela","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023095596","display_name":"Am\u00e9lie H\u00e9liou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H\u00e9liou, Am\u00e9lie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080125160","display_name":"Aliaksei Severyn","orcid":"https://orcid.org/0009-0003-2954-4167"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Severyn, Aliaksei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108813586","display_name":"Matt Hoffman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoffman, Matt","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049028265","display_name":"Nikola Momchev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Momchev, Nikola","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089890773","display_name":"Olivier Bachem","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bachem, Olivier","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":20,"corresponding_author_ids":["https://openalex.org/A5035760608"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.7631000280380249,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.7631000280380249,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bond","display_name":"Bond","score":0.6489981412887573},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5593911409378052},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.3852052688598633},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.3450556993484497},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.2765577435493469},{"id":"https://openalex.org/keywords/organic-chemistry","display_name":"Organic chemistry","score":0.16317424178123474},{"id":"https://openalex.org/keywords/finance","display_name":"Finance","score":0.133754163980484}],"concepts":[{"id":"https://openalex.org/C69738904","wikidata":"https://www.wikidata.org/wiki/Q11693","display_name":"Bond","level":2,"score":0.6489981412887573},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5593911409378052},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.3852052688598633},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.3450556993484497},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.2765577435493469},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.16317424178123474},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.133754163980484}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.14622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.14622","pdf_url":"https://arxiv.org/pdf/2407.14622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2407.14622","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.14622","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.14622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.14622","pdf_url":"https://arxiv.org/pdf/2407.14622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W3026162553","https://openalex.org/W2344382886","https://openalex.org/W19111321","https://openalex.org/W2412887479","https://openalex.org/W32245304","https://openalex.org/W1502198272","https://openalex.org/W2017540542","https://openalex.org/W2953684491","https://openalex.org/W4285338581"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"from":[2,77],"human":[3],"feedback":[4],"(RLHF)":[5],"is":[6,27,66],"a":[7,20,46,67,114],"key":[8],"driver":[9],"of":[10,75,95,123],"quality":[11],"and":[12,23,97,104,107,126,135],"safety":[13],"in":[14],"state-of-the-art":[15],"large":[16],"language":[17],"models.":[18,137],"Yet,":[19],"surprisingly":[21],"simple":[22],"strong":[24],"inference-time":[25],"strategy":[26],"Best-of-N":[28,43,54,85],"sampling":[29],"that":[30,50,71,112],"selects":[31],"the":[32,73,78,84,89,121],"best":[33],"generation":[34],"among":[35],"N":[36],"candidates.":[37],"In":[38],"this":[39],"paper,":[40],"we":[41],"propose":[42],"Distillation":[44],"(BOND),":[45],"novel":[47],"RLHF":[48,145],"algorithm":[49,70],"seeks":[51],"to":[52,80,83,100],"emulate":[53],"but":[55],"without":[56],"its":[57],"significant":[58],"computational":[59],"overhead":[60],"at":[61],"inference":[62],"time.":[63],"Specifically,":[64],"BOND":[65,142],"distribution":[68,74],"matching":[69],"forces":[72],"generations":[76],"policy":[79],"get":[81],"closer":[82],"distribution.":[86],"We":[87,119],"use":[88],"Jeffreys":[90],"divergence":[91],"(a":[92],"linear":[93],"combination":[94],"forward":[96],"backward":[98],"KL)":[99],"balance":[101],"between":[102],"mode-covering":[103],"mode-seeking":[105],"behavior,":[106],"derive":[108],"an":[109],"iterative":[110],"formulation":[111],"utilizes":[113],"moving":[115],"anchor":[116],"for":[117],"efficiency.":[118],"demonstrate":[120],"effectiveness":[122],"our":[124],"approach":[125],"several":[127,151],"design":[128],"choices":[129],"through":[130],"experiments":[131],"on":[132,150],"abstractive":[133],"summarization":[134],"Gemma":[136,139],"Aligning":[138],"policies":[140],"with":[141],"outperforms":[143],"other":[144],"algorithms":[146],"by":[147],"improving":[148],"results":[149],"benchmarks.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
