{"id":"https://openalex.org/W7155089822","doi":"https://doi.org/10.48550/arxiv.2604.16830","title":"The Illusion of Certainty: Decoupling Capability and Calibration in On-Policy Distillation","display_name":"The Illusion of Certainty: Decoupling Capability and Calibration in On-Policy Distillation","publication_year":2026,"publication_date":"2026-04-18","ids":{"openalex":"https://openalex.org/W7155089822","doi":"https://doi.org/10.48550/arxiv.2604.16830"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.16830","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.16830","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134171700","display_name":"Jiaxin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiaxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134123917","display_name":"Xiangyu Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Xiangyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134135095","display_name":"Qinglin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Qinglin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022956157","display_name":"Qinyuan Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Qinyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134133640","display_name":"Caiming Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Caiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134201242","display_name":"Chien-Sheng Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Chien-Sheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2646999955177307,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2646999955177307,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1573999971151352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11270000040531158,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.6173999905586243},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5593000054359436},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.5040000081062317},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.48840001225471497},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4449000060558319},{"id":"https://openalex.org/keywords/confidence-interval","display_name":"Confidence interval","score":0.43709999322891235},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.39329999685287476}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.654699981212616},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.6173999905586243},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5593000054359436},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.5040000081062317},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.48840001225471497},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46970000863075256},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.43709999322891235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4271000027656555},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.3804999887943268},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29989999532699585},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.28790000081062317},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C184047640","wikidata":"https://www.wikidata.org/wiki/Q182593","display_name":"Illusion","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C2909755999","wikidata":"https://www.wikidata.org/wiki/Q4751126","display_name":"Low Confidence","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C13662513","wikidata":"https://www.wikidata.org/wiki/Q5160087","display_name":"Confidence distribution","level":3,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.16830","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.16830","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16830","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"On-policy":[0],"distillation":[1,156],"(OPD)":[2],"is":[3,44,72],"an":[4,39,169],"increasingly":[5],"important":[6],"paradigm":[7],"for":[8,78],"post-training":[9],"language":[10],"models.":[11],"However,":[12],"we":[13,97],"identify":[14],"a":[15,75,90,99],"pervasive":[16],"Scaling":[17],"Law":[18],"of":[19],"Miscalibration:":[20],"while":[21,140],"OPD":[22,101],"effectively":[23],"improves":[24],"task":[25],"accuracy,":[26],"it":[27],"systematically":[28],"traps":[29],"models":[30,131],"in":[31,172],"severe":[32],"overconfidence.":[33],"We":[34,63],"trace":[35],"this":[36,65,115],"failure":[37],"to":[38],"information":[40],"mismatch:":[41],"teacher":[42],"supervision":[43],"formed":[45],"under":[46,146],"privileged":[47,84],"context":[48,85],"available":[49],"during":[50],"training,":[51],"whereas":[52],"the":[53,120,124],"deployed":[54],"model":[55,109],"must":[56],"report":[57],"confidence":[58,80,107,113,164],"using":[59],"only":[60],"deployment-time":[61,79],"information.":[62],"formalize":[64],"perspective":[66],"theoretically,":[67],"showing":[68],"that":[69,82,104,135,154,163],"teacher-conditioned":[70],"success":[71],"generally":[73],"not":[74,158],"valid":[76],"target":[77],"and":[81,89,118,132,148,162],"helpful":[83],"induces":[86],"entropy":[87],"collapse":[88],"systematic":[91],"optimism":[92],"bias.":[93],"To":[94],"address":[95],"this,":[96],"propose":[98],"calibration-aware":[100],"framework,":[102],"CaOPD,":[103],"estimates":[105],"empirical":[106],"from":[108],"rollouts,":[110],"replaces":[111],"self-reported":[112],"with":[114],"student-grounded":[116],"target,":[117],"distills":[119],"revised":[121],"response":[122],"through":[123],"same":[125],"self-distillation":[126],"pipeline.":[127],"Experiments":[128],"across":[129],"various":[130],"domains":[133],"show":[134],"CaOPD":[136],"achieves":[137],"Pareto-optimal":[138],"calibration":[139],"maintaining":[141],"competitive":[142],"capability,":[143],"generalizing":[144],"robustly":[145],"out-of-distribution":[147],"continual":[149],"learning.":[150],"Our":[151],"findings":[152],"highlight":[153],"capability":[155],"does":[157],"imply":[159],"calibrated":[160],"confidence,":[161],"should":[165],"be":[166],"treated":[167],"as":[168],"essential":[170],"objective":[171],"post-training.":[173],"Code:":[174],"https://github.com/SalesforceAIResearch/CaOPD":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
