{"id":"https://openalex.org/W7161010155","doi":"https://doi.org/10.48550/arxiv.2605.11260","title":"Curriculum Learning-Guided Progressive Distillation in Large Language Models","display_name":"Curriculum Learning-Guided Progressive Distillation in Large Language Models","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7161010155","doi":"https://doi.org/10.48550/arxiv.2605.11260"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.11260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.11260","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124914403","display_name":"Jincheng Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Jincheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136057465","display_name":"Fanzhi Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Fanzhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136019459","display_name":"Leqi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Leqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5008417632","display_name":"Aryan Mokhtari","orcid":"https://orcid.org/0000-0001-6603-0091"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mokhtari, Aryan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.23499999940395355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.23499999940395355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.09009999781847,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.08919999748468399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/curriculum","display_name":"Curriculum","score":0.7009000182151794},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6614000201225281},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6062999963760376},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4740999937057495},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.31859999895095825}],"concepts":[{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.7009000182151794},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6661999821662903},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6614000201225281},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6062999963760376},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4754999876022339},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4740999937057495},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39629998803138733},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2946999967098236},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C154030694","wikidata":"https://www.wikidata.org/wiki/Q1436074","display_name":"Fractionating column","level":3,"score":0.272599995136261},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.26499998569488525}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.11260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.11260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8305158019065857}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Knowledge":[0],"distillation":[1,22,47,128],"is":[2,120],"a":[3,73],"key":[4],"technique":[5],"for":[6,79],"transferring":[7],"the":[8,29,36,52,136,159],"capabilities":[9],"of":[10,32,115,161],"large":[11],"language":[12,175],"models":[13],"(LLMs)":[14],"into":[15,126,173],"smaller,":[16],"more":[17],"efficient":[18],"student":[19,42],"models.":[20,43,176],"Existing":[21],"approaches":[23],"often":[24],"overlook":[25],"two":[26],"critical":[27],"factors:":[28],"learning":[30],"order":[31],"training":[33,96],"data":[34,84,146,164],"and":[35,41,122,149,166],"capacity":[37,168],"mismatch":[38],"between":[39],"teacher":[40,87,150,167],"This":[44],"oversight":[45],"limits":[46],"performance,":[48],"as":[49],"manifested":[50],"by":[51,82,94,111],"counter-intuitive":[53],"phenomenon":[54],"where":[55],"stronger":[56],"teachers":[57,114],"fail":[58],"to":[59,100],"produce":[60],"better":[61],"students.":[62],"In":[63],"this":[64],"work,":[65],"we":[66],"propose":[67],"Curriculum":[68],"Learning-Guided":[69],"Progressive":[70],"Distillation":[71],"(CLPD),":[72],"unified":[74],"framework":[75,119],"that":[76,140],"explicitly":[77],"accounts":[78],"both":[80],"factors":[81],"aligning":[83],"difficulty":[85],"with":[86,130],"strength.":[88],"CLPD":[89,141],"constructs":[90],"an":[91,105],"explicit":[92],"curriculum":[93,107],"organizing":[95],"examples":[97],"from":[98],"easy":[99],"hard,":[101],"while":[102],"simultaneously":[103],"applying":[104],"implicit":[106],"over":[108],"supervision":[109],"signals":[110],"progressively":[112],"scheduling":[113,151],"increasing":[116],"capacity.":[117],"Our":[118],"modular":[121],"can":[123],"be":[124],"integrated":[125],"standard":[127,144],"algorithms":[129],"minimal":[131],"overhead.":[132],"Empirical":[133],"results":[134],"on":[135],"reasoning":[137,171],"benchmarks":[138],"demonstrate":[139],"consistently":[142],"outperforms":[143],"distillation,":[145],"ordering":[147,165],"alone,":[148],"alone":[152],"across":[153],"multiple":[154],"settings.":[155],"These":[156],"findings":[157],"highlight":[158],"importance":[160],"jointly":[162],"considering":[163],"when":[169],"distilling":[170],"abilities":[172],"small":[174]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-14T00:00:00"}
