{"id":"https://openalex.org/W7153212572","doi":"https://doi.org/10.48550/arxiv.2604.07837","title":"SPARD: Self-Paced Curriculum for RL Alignment via Integrating Reward Dynamics and Data Utility","display_name":"SPARD: Self-Paced Curriculum for RL Alignment via Integrating Reward Dynamics and Data Utility","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153212572","doi":"https://doi.org/10.48550/arxiv.2604.07837"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.07837","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07837","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.07837","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133328102","display_name":"Xuyang Zhi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhi, Xuyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133351438","display_name":"Peilun zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"zhou, Peilun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103860397","display_name":"Chengqiang Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Chengqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133351082","display_name":"Hang Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Hang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133318470","display_name":"Yiwei Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053555212","display_name":"Rihong Zhang","orcid":"https://orcid.org/0000-0002-2461-9233"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Rongyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133349766","display_name":"Yan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133387070","display_name":"YI WU","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"WU, YI","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133367165","display_name":"Yao Hu","orcid":"https://orcid.org/0009-0002-2166-2479"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113346577","display_name":"Hongchao Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Hongchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133358694","display_name":"Defu Lian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lian, Defu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133316939","display_name":"Hao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133346358","display_name":"Enhong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Enhong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5133328102"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3172999918460846,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3172999918460846,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.0851999968290329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07479999959468842,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.6377000212669373},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4952999949455261},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.46399998664855957},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.44999998807907104},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.4302999973297119},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.38089999556541443},{"id":"https://openalex.org/keywords/system-dynamics","display_name":"System dynamics","score":0.3237000107765198}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7524999976158142},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.6377000212669373},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4952999949455261},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.46399998664855957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4544000029563904},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44999998807907104},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4311000108718872},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.4302999973297119},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.38089999556541443},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.3231000006198883},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C2780735816","wikidata":"https://www.wikidata.org/wiki/Q28324931","display_name":"Incremental learning","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.07837","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07837","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.07837","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07837","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5290566086769104,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"evolution":[1],"of":[2,34,49],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"is":[7],"shifting":[8],"the":[9,24,30],"focus":[10],"from":[11],"single,":[12],"verifiable":[13],"tasks":[14],"toward":[15,41],"complex,":[16],"open-ended":[17],"real-world":[18],"scenarios,":[19],"imposing":[20],"significant":[21],"challenges":[22],"on":[23,60],"post-training":[25],"phase.":[26],"In":[27],"these":[28,77],"settings,":[29],"scale":[31],"and":[32,52,68,100],"complexity":[33],"reward":[35,62,98],"systems":[36],"have":[37],"grown":[38],"significantly,":[39],"transitioning":[40],"multi-objective":[42,97],"formulations":[43],"that":[44,84,119],"encompass":[45],"a":[46,82],"comprehensive":[47],"spectrum":[48],"model":[50,123],"capabilities":[51,124],"application":[53],"contexts.":[54],"However,":[55],"traditional":[56],"methods":[57],"typically":[58],"rely":[59],"fixed":[61],"weights,":[63],"ignoring":[64],"non-stationary":[65],"learning":[66,92,105],"dynamics":[67],"struggling":[69],"with":[70,107],"data":[71,101,108],"heterogeneity":[72],"across":[73,115,125],"dimensions.":[74],"To":[75],"address":[76],"issues,":[78],"we":[79],"propose":[80],"SPARD,":[81],"framework":[83],"establishes":[85],"an":[86],"automated,":[87],"self-paced":[88],"curriculum":[89],"by":[90],"perceiving":[91],"progress":[93],"to":[94],"dynamically":[95],"adjust":[96],"weights":[99],"importance,":[102],"thereby":[103],"synchronizing":[104],"intent":[106],"utility":[109],"for":[110],"optimal":[111],"performance.":[112],"Extensive":[113],"experiments":[114],"multiple":[116],"benchmarks":[117],"demonstrate":[118],"SPARD":[120],"significantly":[121],"enhances":[122],"all":[126],"domains.":[127]},"counts_by_year":[],"updated_date":"2026-04-11T06:19:08.300824","created_date":"2026-04-11T00:00:00"}
