{"id":"https://openalex.org/W7160919996","doi":"https://doi.org/10.48550/arxiv.2605.10716","title":"What should post-training optimize? A test-time scaling law perspective","display_name":"What should post-training optimize? A test-time scaling law perspective","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7160919996","doi":"https://doi.org/10.48550/arxiv.2605.10716"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.10716","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10716","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.10716","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124891337","display_name":"Muheng Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Muheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135993825","display_name":"Jian Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124887906","display_name":"Wenlong Mou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mou, Wenlong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3370000123977661,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3370000123977661,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.22849999368190765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.03480000048875809,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.730400025844574},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6740999817848206},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.666700005531311},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.5717999935150146},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.49729999899864197},{"id":"https://openalex.org/keywords/moment","display_name":"Moment (physics)","score":0.4900999963283539},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.44760000705718994},{"id":"https://openalex.org/keywords/budget-constraint","display_name":"Budget constraint","score":0.43320000171661377}],"concepts":[{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.730400025844574},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6740999817848206},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.666700005531311},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6606000065803528},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.5717999935150146},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.5004000067710876},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.49729999899864197},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.4900999963283539},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.43320000171661377},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.42160001397132874},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.34439998865127563},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31929999589920044},{"id":"https://openalex.org/C2988430800","wikidata":"https://www.wikidata.org/wiki/Q428971","display_name":"Scaling law","level":3,"score":0.3154999911785126},{"id":"https://openalex.org/C101454708","wikidata":"https://www.wikidata.org/wiki/Q17106019","display_name":"Standard Model (mathematical formulation)","level":3,"score":0.3116999864578247},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C63002673","wikidata":"https://www.wikidata.org/wiki/Q2260590","display_name":"Scoring rule","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.2700999975204468},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2694000005722046},{"id":"https://openalex.org/C3020318244","wikidata":"https://www.wikidata.org/wiki/Q4812187","display_name":"Large sample","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C162118730","wikidata":"https://www.wikidata.org/wiki/Q1128453","display_name":"Actuarial science","level":1,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.10716","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10716","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.10716","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10716","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4299948513507843}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,187],"models":[2,190],"are":[3,105],"increasingly":[4],"deployed":[5],"with":[6,14],"test-time":[7,92,197],"strategies:":[8],"sample":[9],"$N$":[10],"responses,":[11],"score":[12],"them":[13],"a":[15,28,39,137,148,156,164],"reward":[16,37,53,121,189],"model":[17],"or":[18],"verifier,":[19],"and":[20,163,180,191,196],"return":[21],"the":[22,35,48,52,69,110,120,126,130],"best.":[23],"This":[24,146],"deployment":[25,86],"rule":[26],"exposes":[27],"mismatch":[29],"in":[30],"post-training:":[31,155],"standard":[32],"objectives":[33,57],"optimize":[34],"mean":[36],"of":[38,51,129,150],"single":[40],"response,":[41],"whereas":[42],"best-of-$N$":[43,114,131,183],"performance":[44,184],"is":[45,77,113],"governed":[46],"by":[47,142],"upper":[49],"tail":[50],"distribution.":[54],"Recent":[55],"test-time-aware":[56],"partly":[58],"address":[59],"this":[60,96],"mismatch,":[61],"but":[62,109],"typically":[63],"assume":[64],"that":[65,125,178],"training":[66,108,195],"can":[67,87,133],"use":[68],"same":[70],"per-prompt":[71,91,103],"rollout":[72,140],"budget":[73,198],"as":[74],"deployment,":[75],"which":[76],"impractical":[78],"when":[79],"post-training":[80],"must":[81],"cover":[82],"many":[83],"prompts":[84],"while":[85],"allocate":[88],"much":[89,138],"larger":[90],"compute.":[93],"We":[94],"study":[95],"budget-mismatch":[97],"regime,":[98],"where":[99],"only":[100],"$m\\ll":[101],"N$":[102],"rollouts":[104],"available":[106],"during":[107],"target":[111],"objective":[112,132],"deployment.":[115],"Under":[116],"structural":[117],"assumptions":[118],"on":[119,170,174],"tails,":[122],"we":[123],"show":[124,177],"policy":[127],"gradient":[128],"be":[134],"approximated":[135],"from":[136],"smaller":[139],"group":[141],"extrapolating":[143],"upper-tail":[144],"statistics.":[145],"yields":[147],"family":[149],"Tail-Extrapolated":[151,160],"estimators":[152],"for":[153],"best-of-$N$-oriented":[154],"simple":[157],"direct":[158],"estimator,":[159],"Advantage":[161],"(TEA),":[162],"fixed-order":[165],"debiased":[166],"Prefix-TEA":[167,181],"estimator":[168],"based":[169],"moment":[171],"cancellation.":[172],"Experiments":[173],"instruction-following":[175],"tasks":[176],"TEA":[179],"improve":[182],"across":[185],"different":[186],"models,":[188],"datasets":[192],"under":[193],"various":[194],"settings.":[199]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
