{"id":"https://openalex.org/W7131657503","doi":"https://doi.org/10.48550/arxiv.2602.21585","title":"Duel-Evolve: Reward-Free Test-Time Scaling via LLM Self-Preferences","display_name":"Duel-Evolve: Reward-Free Test-Time Scaling via LLM Self-Preferences","publication_year":2026,"publication_date":"2026-02-25","ids":{"openalex":"https://openalex.org/W7131657503","doi":"https://doi.org/10.48550/arxiv.2602.21585"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.21585","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083128256","display_name":"Sweta Karlekar","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Karlekar, Sweta","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126947202","display_name":"Carolina Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Carolina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115997593","display_name":"Magnus Saebo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saebo, Magnus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Beltran-Velez, Nicolas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beltran-Velez, Nicolas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126932368","display_name":"Shuyang Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Shuyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037074790","display_name":"John Bowlan","orcid":"https://orcid.org/0000-0002-2051-1778"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bowlan, John","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058477567","display_name":"Michal Kucer","orcid":"https://orcid.org/0000-0002-1438-4442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kucer, Michal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103551371","display_name":"David Blei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Blei, David","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5083128256"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.12610000371932983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.12610000371932983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.07429999858140945,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.04349999874830246,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.7146999835968018},{"id":"https://openalex.org/keywords/bayesian-optimization","display_name":"Bayesian optimization","score":0.4447000026702881},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.43459999561309814},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4212999939918518},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.3953999876976013},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.34630000591278076},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.33000001311302185}],"concepts":[{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.7146999835968018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5116000175476074},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.503000020980835},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.4447000026702881},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4417000114917755},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.43459999561309814},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4212999939918518},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.367000013589859},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.33000001311302185},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.3206999897956848},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2874000072479248},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2732999920845032},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C141934464","wikidata":"https://www.wikidata.org/wiki/Q3305386","display_name":"Local optimum","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.25870001316070557},{"id":"https://openalex.org/C34559072","wikidata":"https://www.wikidata.org/wiki/Q2334061","display_name":"Design of experiments","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.21585","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.21585","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.21585","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.21585","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Many":[0],"applications":[1],"seek":[2],"to":[3,33,55,100,143],"optimize":[4],"LLM":[5,70,98],"outputs":[6],"at":[7],"test":[8],"time":[9],"by":[10,50,175],"iteratively":[11],"proposing,":[12],"scoring,":[13],"and":[14,64,163,165,192],"refining":[15],"candidates":[16],"over":[17,160,171,176,209],"a":[18,25,110],"discrete":[19,211],"output":[20,212],"space.":[21],"Existing":[22],"methods":[23,162,174],"use":[24],"calibrated":[26],"scalar":[27,89],"evaluator":[28],"for":[29,37,206],"the":[30,69,96,126,181],"target":[31],"objective":[32],"guide":[34,123],"search,":[35,191],"but":[36],"many":[38],"tasks":[39],"such":[40],"scores":[41],"are":[42,52],"unavailable,":[43],"too":[44],"sparse,":[45],"or":[46],"unreliable.":[47],"Pairwise":[48],"comparisons,":[49],"contrast,":[51],"often":[53],"easier":[54],"elicit,":[56],"still":[57],"provide":[58,202],"useful":[59],"signal":[60,205],"on":[61,76,150,166],"improvement":[62,208],"directions,":[63],"can":[65],"be":[66],"obtained":[67],"from":[68,95],"itself":[71],"without":[72],"external":[73,88],"supervision.":[74],"Building":[75],"this":[77],"observation,":[78],"we":[79],"introduce":[80],"Duel-Evolve,":[81],"an":[82],"evolutionary":[83],"optimization":[84,204],"algorithm":[85],"that":[86,199],"replaces":[87],"rewards":[90],"with":[91],"pairwise":[92,200],"preferences":[93],"elicited":[94],"same":[97],"used":[99],"generate":[101,144],"candidates.":[102,146],"Duel-Evolve":[103,149],"aggregates":[104],"these":[105],"noisy":[106],"candidate":[107,118],"comparisons":[108],"via":[109],"Bayesian":[111],"Bradley-Terry":[112],"model,":[113,186],"yielding":[114],"uncertainty-aware":[115],"estimates":[116,122],"of":[117,125,140],"quality.":[119],"These":[120],"quality":[121],"allocation":[124],"comparison":[127],"budget":[128],"toward":[129],"plausible":[130],"optima":[131],"using":[132],"Double":[133],"Thompson":[134],"Sampling,":[135],"as":[136,138],"well":[137],"selection":[139],"high-quality":[141],"parents":[142],"improved":[145],"We":[147],"evaluate":[148],"MathBench,":[151],"where":[152,168],"it":[153,169],"achieves":[154],"20":[155],"percentage":[156,178],"points":[157],"higher":[158],"accuracy":[159],"existing":[161],"baselines,":[164],"LiveCodeBench,":[167],"improves":[170],"comparable":[172],"iterative":[173],"12":[177],"points.":[179],"Notably,":[180],"method":[182],"requires":[183],"no":[184,187,193],"reward":[185],"ground-truth":[188],"labels":[189],"during":[190],"hand-crafted":[194],"scoring":[195],"function.":[196],"Results":[197],"show":[198],"self-preferences":[201],"strong":[203],"test-time":[207],"large,":[210],"spaces.":[213]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-27T00:00:00"}
