{"id":"https://openalex.org/W4415367418","doi":"https://doi.org/10.1109/isit63088.2025.11195541","title":"Soft Best-of-$n$ Sampling for Model Alignment","display_name":"Soft Best-of-$n$ Sampling for Model Alignment","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4415367418","doi":"https://doi.org/10.1109/isit63088.2025.11195541"},"language":null,"primary_location":{"id":"doi:10.1109/isit63088.2025.11195541","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit63088.2025.11195541","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085996844","display_name":"Claudio Mayrink Verdun","orcid":"https://orcid.org/0000-0003-2079-797X"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Claudio Mayrink Verdun","raw_affiliation_strings":["Harvard University,Allston,MA,USA"],"affiliations":[{"raw_affiliation_string":"Harvard University,Allston,MA,USA","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068245182","display_name":"Alex Oesterling","orcid":"https://orcid.org/0000-0001-8546-0089"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Oesterling","raw_affiliation_strings":["Harvard University,Allston,MA,USA"],"affiliations":[{"raw_affiliation_string":"Harvard University,Allston,MA,USA","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015520086","display_name":"Himabindu Lakkaraju","orcid":"https://orcid.org/0000-0001-7922-6544"},"institutions":[{"id":"https://openalex.org/I4210138976","display_name":"Association for Language Learning","ror":"https://ror.org/0455ncp47","country_code":"GB","type":"other","lineage":["https://openalex.org/I4210138976"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Himabindu Lakkaraju","raw_affiliation_strings":["Harvard Business School,Allston,MA,USA"],"affiliations":[{"raw_affiliation_string":"Harvard Business School,Allston,MA,USA","institution_ids":["https://openalex.org/I4210138976"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074697940","display_name":"Fl\u00e1vio P. Calmon","orcid":"https://orcid.org/0000-0002-7493-1428"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Flavio P. Calmon","raw_affiliation_strings":["Harvard University,Allston,MA,USA"],"affiliations":[{"raw_affiliation_string":"Harvard University,Allston,MA,USA","institution_ids":["https://openalex.org/I2801851002"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5085996844"],"corresponding_institution_ids":["https://openalex.org/I2801851002"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15417601,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9592000246047974,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.7882999777793884},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.7534999847412109},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.7297000288963318},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7035999894142151},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5626000165939331},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.5264000296592712},{"id":"https://openalex.org/keywords/sampling-distribution","display_name":"Sampling distribution","score":0.4805000126361847}],"concepts":[{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.7882999777793884},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.7534999847412109},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.7297000288963318},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7035999894142151},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.59579998254776},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5626000165939331},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.5264000296592712},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4830999970436096},{"id":"https://openalex.org/C167723999","wikidata":"https://www.wikidata.org/wiki/Q3773214","display_name":"Sampling distribution","level":2,"score":0.4805000126361847},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3637999892234802},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.34929999709129333},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.30959999561309814},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.2994999885559082},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.290800005197525},{"id":"https://openalex.org/C20326153","wikidata":"https://www.wikidata.org/wiki/Q7049638","display_name":"Nonuniform sampling","level":3,"score":0.29030001163482666},{"id":"https://openalex.org/C64185310","wikidata":"https://www.wikidata.org/wiki/Q843483","display_name":"Rate\u2013distortion theory","level":3,"score":0.2727000117301941},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.25780001282691956}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isit63088.2025.11195541","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit63088.2025.11195541","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1545942139","display_name":null,"funder_award_id":"CIF 2312667,FAI 2040880,CIF 2231707","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1892384964","https://openalex.org/W1977691343","https://openalex.org/W2000977215","https://openalex.org/W2002240881","https://openalex.org/W2018857587","https://openalex.org/W2020658897","https://openalex.org/W2050352677","https://openalex.org/W2069770322","https://openalex.org/W2078870785","https://openalex.org/W2080328084","https://openalex.org/W2099111195","https://openalex.org/W2108470768","https://openalex.org/W2110156410","https://openalex.org/W2160709761","https://openalex.org/W3080690820","https://openalex.org/W3133702157","https://openalex.org/W4211042066","https://openalex.org/W4390839743","https://openalex.org/W4401692366"],"related_works":[],"abstract_inverted_index":{"Best-of-<tex":[0,92],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[1,27,78,93,138],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$n$</tex>":[2,28,79,94],"(BoN)":[3],"sampling":[4,21,125],"is":[5,22,67],"a":[6,31,40,51,81,85,96,113,134],"practical":[7],"approach":[8],"for":[9,102],"aligning":[10],"language":[11],"model":[12,157],"outputs":[13],"with":[14],"human":[15],"preferences":[16],"without":[17],"expensive":[18],"fine-tuning.":[19],"BoN":[20,43,99],"performed":[23],"by":[24,56,70],"generating":[25],"<tex":[26,77,137],"responses":[29],"to":[30,128],"prompt":[32],"and":[33,62,109,142],"then":[34],"selecting":[35],"the":[36,57,60,72,106,129,143,160],"sample":[37],"that":[38,100,122,158],"maximizes":[39],"reward":[41,46,83,156],"function.":[42],"yields":[44,80],"high":[45],"values":[47],"in":[48,140],"practice":[49],"at":[50,84,133],"distortion":[52,66,87],"cost,":[53],"as":[54],"measured":[55],"KL-divergence":[58],"between":[59,105],"sampled":[61],"original":[63,107],"distribution.":[64],"This":[65],"coarsely":[68],"controlled":[69],"varying":[71],"number":[73],"of":[74,98,136,149,163],"samples:":[75],"larger":[76],"higher":[82,86],"cost.":[88],"We":[89,117],"introduce":[90],"Soft":[91,123],"sampling,":[95],"generalization":[97],"allows":[101],"smooth":[103],"interpolation":[104],"distribution":[108,111,132],"reward-maximizing":[110],"through":[112],"temperature":[114],"parameter":[115],"A.":[116],"establish":[118],"theoretical":[119],"guarantees":[120],"showing":[121],"Best-of-n":[124],"converges":[126],"sharply":[127],"optimal":[130],"tilted":[131],"rate":[135],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$O(1/n)$</tex>":[139],"KL":[141],"expected":[144],"(relative)":[145],"reward.":[146],"For":[147],"sequences":[148],"discrete":[150],"outputs,":[151],"we":[152],"analyze":[153],"an":[154],"additive":[155],"reveals":[159],"fundamental":[161],"limitations":[162],"blockwise":[164],"sampling.":[165]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-21T00:00:00"}
