{"id":"https://openalex.org/W7159686044","doi":"https://doi.org/10.48550/arxiv.2604.27083","title":"Co-Evolving Policy Distillation","display_name":"Co-Evolving Policy Distillation","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7159686044","doi":"https://doi.org/10.48550/arxiv.2604.27083"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27083","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27083","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134975981","display_name":"Naibin Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gu, Naibin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134940467","display_name":"Chenxu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Chenxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134951683","display_name":"Qingyi Si","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Si, Qingyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134971339","display_name":"Chuanyu Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Chuanyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013346368","display_name":"Dingyu Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Dingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134983737","display_name":"Peng Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Peng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134941720","display_name":"Zheng Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Zheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134959578","display_name":"Weiping Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Weiping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134976056","display_name":"Nan Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Nan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134967594","display_name":"Jiaqi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5134975981"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6001999974250793,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6001999974250793,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.1421000063419342,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.02280000038444996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.657800018787384},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5623000264167786},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5421000123023987},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5350000262260437},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.40639999508857727}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6651999950408936},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.657800018787384},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5623000264167786},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5421000123023987},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.421099990606308},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.40639999508857727},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3995000123977661},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.26499998569488525}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27083","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27083","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6200354099273682,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"RLVR":[0,34,89,143],"and":[1,47,68,82,132,144,146],"OPD":[2,84,104],"have":[3],"become":[4],"standard":[5],"paradigms":[6,17],"for":[7],"post-training.":[8],"We":[9,70],"provide":[10],"a":[11,24,161],"unified":[12],"analysis":[13],"of":[14,43,80,129],"these":[15],"two":[16],"in":[18,30],"consolidating":[19],"multiple":[20],"expert":[21,95],"capabilities":[22,59],"into":[23],"single":[25],"model,":[26],"identifying":[27],"capability":[28],"loss":[29],"different":[31],"ways:":[32],"mixed":[33,142],"suffers":[35],"from":[36],"inter-capability":[37],"divergence":[38],"cost,":[39],"while":[40,116],"the":[41],"pipeline":[42],"first":[44],"training":[45,79,90,154,163],"experts":[46,81,98,115],"then":[48],"performing":[49],"OPD,":[50],"though":[51],"avoiding":[52],"divergence,":[53],"fails":[54],"to":[55,61,106],"fully":[56],"absorb":[57],"teacher":[58,67],"due":[60],"large":[62],"behavioral":[63,112],"pattern":[64,155],"gaps":[65],"between":[66],"student.":[69],"propose":[71],"Co-Evolving":[72],"Policy":[73],"Distillation":[74],"(CoPD),":[75],"which":[76],"encourages":[77],"parallel":[78,153],"introduces":[83],"during":[85],"each":[86],"expert's":[87],"ongoing":[88],"rather":[91],"than":[92],"after":[93],"complete":[94],"training,":[96],"with":[97],"serving":[99],"as":[100,141],"mutual":[101],"teachers":[102],"(making":[103],"bidirectional)":[105],"co-evolve.":[107],"This":[108],"enables":[109],"more":[110],"consistent":[111],"patterns":[113],"among":[114],"maintaining":[117],"sufficient":[118],"complementary":[119],"knowledge":[120],"throughout.":[121],"Experiments":[122],"validate":[123],"that":[124],"CoPD":[125,158],"achieves":[126],"all-in-one":[127],"integration":[128],"text,":[130],"image,":[131],"video":[133],"reasoning":[134],"capabilities,":[135],"significantly":[136],"outperforming":[137],"strong":[138],"baselines":[139],"such":[140],"MOPD,":[145],"even":[147],"surpassing":[148],"domain-specific":[149],"experts.":[150],"The":[151],"model":[152],"offered":[156],"by":[157],"may":[159],"inspire":[160],"novel":[162],"scaling":[164],"paradigm.":[165]},"counts_by_year":[],"updated_date":"2026-05-02T06:10:54.344120","created_date":"2026-05-02T00:00:00"}
