{"id":"https://openalex.org/W7160592713","doi":"https://doi.org/10.48550/arxiv.2605.06614","title":"SkillOS: Learning Skill Curation for Self-Evolving Agents","display_name":"SkillOS: Learning Skill Curation for Self-Evolving Agents","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160592713","doi":"https://doi.org/10.48550/arxiv.2605.06614"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06614","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002970439","display_name":"Siru Ouyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ouyang, Siru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135676471","display_name":"Jun Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135695486","display_name":"Yanfei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yanfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135670061","display_name":"Rujun Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Rujun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135709673","display_name":"Zifeng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zifeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087135401","display_name":"Bhavana Dalvi Mishra","orcid":"https://orcid.org/0000-0002-3813-8641"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Bhavana Dalvi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135678547","display_name":"Rui Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135703743","display_name":"Chun-Liang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chun-Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064060066","display_name":"Yizhu Jiao","orcid":"https://orcid.org/0000-0003-0509-8652"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Yizhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135712499","display_name":"Kaiwen Zha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zha, Kaiwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068080887","display_name":"Maohao Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Maohao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135719160","display_name":"Vishy Tirumalashetty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tirumalashetty, Vishy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069155531","display_name":"George Lee","orcid":"https://orcid.org/0000-0002-8705-9210"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, George","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135648752","display_name":"Jiawei Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Jiawei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135690022","display_name":"Tomas Pfister","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pfister, Tomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135719007","display_name":"Chen-Yu Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Chen-Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2696000039577484,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2696000039577484,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07919999957084656,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.06729999929666519,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executor","display_name":"Executor","score":0.8618000149726868},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7792999744415283},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5712000131607056},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4490000009536743},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4350999891757965},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.3865000009536743},{"id":"https://openalex.org/keywords/data-curation","display_name":"Data curation","score":0.376800000667572},{"id":"https://openalex.org/keywords/dreyfus-model-of-skill-acquisition","display_name":"Dreyfus model of skill acquisition","score":0.3488999903202057}],"concepts":[{"id":"https://openalex.org/C180591056","wikidata":"https://www.wikidata.org/wiki/Q654437","display_name":"Executor","level":2,"score":0.8618000149726868},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7792999744415283},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6341000199317932},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5712000131607056},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4490000009536743},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4350999891757965},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.3865000009536743},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.385699987411499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3799999952316284},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.376800000667572},{"id":"https://openalex.org/C132758656","wikidata":"https://www.wikidata.org/wiki/Q5307365","display_name":"Dreyfus model of skill acquisition","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3237000107765198},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C2986563244","wikidata":"https://www.wikidata.org/wiki/Q6822310","display_name":"Learning to learn","level":2,"score":0.31040000915527344},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.30489999055862427},{"id":"https://openalex.org/C2779786715","wikidata":"https://www.wikidata.org/wiki/Q1393166","display_name":"Discovery learning","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2556000053882599},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.2526000142097473},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7735957503318787,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLM-based":[0],"agents":[1],"are":[2],"increasingly":[3],"deployed":[4],"to":[5,18,65],"handle":[6],"streaming":[7],"tasks,":[8,160],"yet":[9],"they":[10,62],"often":[11],"remain":[12],"one-off":[13],"problem":[14],"solvers":[15],"that":[16,101,111,190,211],"fail":[17],"learn":[19,66],"from":[20,26,71,116],"past":[21],"interactions.":[22],"Reusable":[23],"skills":[24,105,201],"distilled":[25],"experience":[27],"provide":[28,120],"a":[29,97,107],"natural":[30],"substrate":[31],"for":[32,57,88,123],"self-evolution,":[33],"where":[34,140],"high-quality":[35],"skill":[36,49,53,59,90,109,177,197],"curation":[37,69,91],"serves":[38],"as":[39],"the":[40,144,175,191,200],"key":[41],"bottleneck.":[42],"Existing":[43],"approaches":[44],"either":[45],"rely":[46],"on":[47,131,136],"manual":[48],"curation,":[50,124],"prescribe":[51],"heuristic":[52],"operations,":[54],"or":[55],"train":[56,130],"short-horizon":[58],"operations.":[60],"However,":[61],"still":[63],"struggle":[64],"complex":[67],"long-term":[68],"policies":[70],"indirect":[72],"and":[73,103,129,146,157,165,172,184],"delayed":[74],"feedback.":[75],"To":[76,119],"tackle":[77],"this":[78],"challenge,":[79],"we":[80,125],"propose":[81],"SkillOS,":[82],"an":[83,113],"experience-driven":[84],"RL":[85],"training":[86],"recipe":[87],"learning":[89,121],"in":[92,169,202],"self-evolving":[93],"agents.":[94],"SkillOS":[95,161],"pairs":[96],"frozen":[98],"agent":[99],"executor":[100,182],"retrieves":[102],"applies":[104],"with":[106,174],"trainable":[108],"curator":[110,178,193],"updates":[112],"external":[114],"SkillRepo":[115,203],"accumulated":[117],"experience.":[118],"signals":[122],"design":[126],"composite":[127],"rewards":[128],"grouped":[132],"task":[133,138,185],"streams":[134],"based":[135],"skill-relevant":[137],"dependencies,":[139],"earlier":[141],"trajectories":[142],"update":[143],"SkillRepo,":[145],"later":[147],"related":[148],"tasks":[149,156],"evaluate":[150],"these":[151],"updates.":[152],"Across":[153],"multi-turn":[154],"agentic":[155],"single-turn":[158],"reasoning":[159],"consistently":[162],"outperforms":[163],"memory-free":[164],"strong":[166],"memory-based":[167],"baselines":[168],"both":[170],"effectiveness":[171],"efficiency,":[173],"learned":[176,192],"generalizing":[179],"across":[180],"different":[181],"backbones":[183],"domains.":[186],"Further":[187],"analyses":[188],"show":[189],"produces":[194],"more":[195,206],"targeted":[196],"use,":[198],"while":[199],"evolve":[204],"into":[205],"richly":[207],"structured":[208],"Markdown":[209],"files":[210],"encode":[212],"higher-level":[213],"meta-skills":[214],"over":[215],"time.":[216]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-09T00:00:00"}
