{"id":"https://openalex.org/W7162677880","doi":"https://doi.org/10.48550/arxiv.2605.28751","title":"Extrapolative Weight Averaging Reveals Correctness-Efficiency Frontiers in Code RL","display_name":"Extrapolative Weight Averaging Reveals Correctness-Efficiency Frontiers in Code RL","publication_year":2026,"publication_date":"2026-05-27","ids":{"openalex":"https://openalex.org/W7162677880","doi":"https://doi.org/10.48550/arxiv.2605.28751"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.28751","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.28751","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.28751","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137230198","display_name":"Kunhao Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Kunhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137275383","display_name":"Pierre Chambon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chambon, Pierre","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137211709","display_name":"Juliette Decugis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Decugis, Juliette","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137242781","display_name":"Jonas Gehring","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gehring, Jonas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137302702","display_name":"Taco Cohen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cohen, Taco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137266901","display_name":"Benjamin Negrevergne","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Negrevergne, Benjamin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137283638","display_name":"Gabriel Synnaeve","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Synnaeve, Gabriel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.12520000338554382,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.12520000338554382,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.11420000344514847,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.10849999636411667,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6417999863624573},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.5972999930381775},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5199000239372253},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.4537999927997589},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4052000045776367},{"id":"https://openalex.org/keywords/continuation","display_name":"Continuation","score":0.39910000562667847}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6417999863624573},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.5972999930381775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5674999952316284},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5199000239372253},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.4537999927997589},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.41190001368522644},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4052000045776367},{"id":"https://openalex.org/C88626702","wikidata":"https://www.wikidata.org/wiki/Q1128903","display_name":"Continuation","level":2,"score":0.39910000562667847},{"id":"https://openalex.org/C132459708","wikidata":"https://www.wikidata.org/wiki/Q744069","display_name":"Extrapolation","level":2,"score":0.3978999853134155},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.37610000371932983},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.32190001010894775},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.3197000026702881},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C51485801","wikidata":"https://www.wikidata.org/wiki/Q16966861","display_name":"Efficient frontier","level":3,"score":0.2806999981403351},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2766000032424927},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2551000118255615},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.28751","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.28751","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.28751","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.28751","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Linear":[0],"interpolation":[1],"between":[2,13,121],"fine-tuned":[3],"checkpoints":[4,27,70,125,179],"has":[5],"been":[6],"shown":[7],"to":[8,25,90],"trace":[9],"the":[10,91,97,134,138,165,170,200],"Pareto":[11],"front":[12],"competing":[14],"objectives,":[15],"but":[16,111],"whether":[17],"extrapolative":[18,142,187,222],"weight":[19,188,223],"averaging":[20,189,224],"can":[21,225],"extend":[22],"such":[23],"frontiers":[24],"new":[26],"useful":[28],"at":[29,204],"inference":[30,147],"time,":[31],"without":[32],"additional":[33],"RL":[34,43,217],"training,":[35],"remains":[36],"unclear.":[37],"We":[38],"study":[39],"this":[40,127],"question":[41],"in":[42,182,215],"for":[44],"competitive":[45],"programming,":[46],"where":[47],"hidden":[48],"unit":[49],"tests":[50,88],"under":[51,71],"time":[52],"and":[53,60,123,140,153,156,162,192,228],"memory":[54],"limits":[55],"enforce":[56],"both":[57],"functional":[58],"correctness":[59,113],"computational":[61],"efficiency.":[62],"Starting":[63],"from":[64],"a":[65,100,219],"shared":[66],"initialization,":[67],"we":[68],"train":[69],"nested":[72,212],"unit-test":[73,213],"coverage:":[74],"low-coverage":[75],"rewards":[76,83],"require":[77,84],"passing":[78,85],"smaller-input":[79],"tests,":[80],"while":[81,129],"high-coverage":[82,124],"progressively":[86],"larger":[87],"up":[89],"full":[92],"suite.":[93],"This":[94],"sweep":[95],"reveals":[96],"emergence":[98],"of":[99],"correctness-efficiency":[101],"frontier:":[102],"on":[103,195],"hard":[104],"problems,":[105],"higher-coverage":[106],"reward":[107],"reduces":[108],"optimization":[109],"failures":[110],"increases":[112],"failures,":[114],"leaving":[115],"solve":[116],"rate":[117],"nearly":[118],"unchanged.":[119],"Interpolation":[120],"low-":[122],"recovers":[126],"frontier,":[128],"extrapolation":[130],"extends":[131],"it":[132],"beyond":[133],"trained":[135],"endpoints.":[136],"Both":[137],"frontier":[139,171,220],"its":[141],"continuation":[143],"appear":[144],"across":[145,157],"three":[146],"settings,":[148],"pure":[149],"reasoning,":[150],"tool":[151],"use,":[152],"agentic":[154],"coding,":[155],"two":[158],"model":[159],"scales,":[160],"32B":[161],"7B.":[163],"At":[164],"problem":[166],"level,":[167],"moving":[168],"along":[169],"changes":[172],"which":[173],"problems":[174],"are":[175],"solved,":[176],"making":[177],"extrapolated":[178],"complementary":[180],"policies":[181],"inference-time":[183],"scaling.":[184],"Ensembles":[185],"with":[186],"broaden":[190],"coverage":[191,214],"improve":[193],"pass@250":[194],"LCB/hard":[196],"by":[197],"3.3%":[198],"over":[199],"best":[201],"single":[202],"checkpoint":[203],"matched":[205],"sample":[206],"budget.":[207],"These":[208],"results":[209],"show":[210],"that":[211,221],"code":[216],"induces":[218],"navigate,":[226],"extend,":[227],"exploit.":[229]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-29T00:00:00"}
