{"id":"https://openalex.org/W7130416366","doi":"https://doi.org/10.48550/arxiv.2602.15260","title":"Fast and Effective On-policy Distillation from Reasoning Prefixes","display_name":"Fast and Effective On-policy Distillation from Reasoning Prefixes","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7130416366","doi":"https://doi.org/10.48550/arxiv.2602.15260"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.15260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.15260","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126333333","display_name":"Dongxu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Dongxu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126306579","display_name":"Zhichao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhichao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003627973","display_name":"Sepehr Janghorbani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Janghorbani, Sepehr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126304697","display_name":"Jun Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126329178","display_name":"Andrew Ressler II","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ressler, Andrew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103865471","display_name":"Qian Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Qian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126320727","display_name":"Gregory D. Lyng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyng, Gregory D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042529162","display_name":"Sanjit Singh Batra","orcid":"https://orcid.org/0000-0001-9849-865X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Batra, Sanjit Singh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125641678","display_name":"Robert E. Tillman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tillman, Robert E.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5126333333"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4325000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4325000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.17520000040531158,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.11909999698400497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.8801000118255615},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.849399983882904},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5647000074386597},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5289000272750854},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5059000253677368},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.49079999327659607}],"concepts":[{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.8801000118255615},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.849399983882904},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.717199981212616},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5647000074386597},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5059000253677368},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.49079999327659607},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.43650001287460327},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3788999915122986},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.364300012588501},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3287999927997589},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C2779795794","wikidata":"https://www.wikidata.org/wiki/Q7315343","display_name":"Reset (finance)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.15260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.15260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4703870415687561,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"On-policy":[0],"distillation":[1,106,134],"(OPD),":[2],"which":[3,47],"samples":[4],"trajectories":[5],"from":[6],"the":[7,17,42,69,84,87,105,136],"student":[8,43,85],"model":[9],"and":[10,27,74,114,127],"supervises":[11],"them":[12],"with":[13],"a":[14,77,96,123],"teacher":[15],"at":[16],"token":[18],"level,":[19],"avoids":[20],"relying":[21],"solely":[22],"on":[23,122],"verifiable":[24],"terminal":[25],"rewards":[26],"can":[28,81],"yield":[29],"better":[30],"generalization":[31],"than":[32],"off-policy":[33],"distillation.":[34,120],"However,":[35],"OPD":[36,140],"requires":[37],"expensive":[38],"on-the-fly":[39],"sampling":[40,117],"of":[41,71,101,111,125,138],"policy":[44],"during":[45,61,119],"training,":[46],"substantially":[48],"increases":[49],"training":[50,63,143],"cost,":[51],"especially":[52],"for":[53],"long":[54],"responses.":[55],"Our":[56],"initial":[57],"analysis":[58],"shows":[59],"that,":[60],"OPD,":[62],"signals":[64],"are":[65],"often":[66],"concentrated":[67],"in":[68],"prefix":[70,80,133],"each":[72,116],"output,":[73],"that":[75,131],"even":[76],"short":[78],"teacher-generated":[79],"significantly":[82],"help":[83],"produce":[86],"correct":[88],"answer.":[89],"Motivated":[90],"by":[91,145],"these":[92],"observations,":[93],"we":[94,103],"propose":[95],"simple":[97],"yet":[98],"effective":[99],"modification":[100],"OPD:":[102],"apply":[104],"objective":[107],"only":[108],"to":[109],"prefixes":[110],"student-generated":[112],"outputs":[113],"terminate":[115],"early":[118],"Experiments":[121],"suite":[124],"AI-for-Math":[126],"out-of-domain":[128],"benchmarks":[129],"show":[130],"on-policy":[132],"matches":[135],"performance":[137],"full":[139],"while":[141],"reducing":[142],"FLOP":[144],"2x-47x.":[146]},"counts_by_year":[],"updated_date":"2026-02-19T06:31:58.851227","created_date":"2026-02-19T00:00:00"}
