{"id":"https://openalex.org/W7127324322","doi":"https://doi.org/10.48550/arxiv.2602.01869","title":"Skill-Pro: Learning Reusable Skills from Experience via Non-Parametric PPO for LLM Agents","display_name":"Skill-Pro: Learning Reusable Skills from Experience via Non-Parametric PPO for LLM Agents","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127324322","doi":"https://doi.org/10.48550/arxiv.2602.01869"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.01869","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001410776","display_name":"Qirui Mi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mi, Qirui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124958797","display_name":"Zhijian Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Zhijian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124912534","display_name":"Mengyue Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Mengyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124963493","display_name":"Haoxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124919307","display_name":"Yisen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yisen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124886182","display_name":"HaiFeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Haifeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124952782","display_name":"Jun Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5001410776"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2621999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2621999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2272000014781952,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.16429999470710754,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.6862000226974487},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6830999851226807},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5214999914169312},{"id":"https://openalex.org/keywords/reusability","display_name":"Reusability","score":0.5121999979019165},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.41830000281333923},{"id":"https://openalex.org/keywords/procedural-memory","display_name":"Procedural memory","score":0.40380001068115234},{"id":"https://openalex.org/keywords/episodic-memory","display_name":"Episodic memory","score":0.38589999079704285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.792900025844574},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.6862000226974487},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6830999851226807},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5214999914169312},{"id":"https://openalex.org/C137981799","wikidata":"https://www.wikidata.org/wiki/Q1369184","display_name":"Reusability","level":3,"score":0.5121999979019165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4449999928474426},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.41830000281333923},{"id":"https://openalex.org/C164749973","wikidata":"https://www.wikidata.org/wiki/Q18606","display_name":"Procedural memory","level":3,"score":0.40380001068115234},{"id":"https://openalex.org/C88576662","wikidata":"https://www.wikidata.org/wiki/Q18646","display_name":"Episodic memory","level":3,"score":0.38589999079704285},{"id":"https://openalex.org/C124469403","wikidata":"https://www.wikidata.org/wiki/Q1813993","display_name":"Procedural knowledge","level":3,"score":0.3847000002861023},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.37689998745918274},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3684999942779541},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.35920000076293945},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3528999984264374},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.2953999936580658},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2791000008583069},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2694000005722046},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2500999867916107}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.01869","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.01869","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01869","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.01869","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.45488855242729187,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLM-driven":[0],"agents":[1,38],"excel":[2],"at":[3],"sequential":[4],"decision-making":[5],"but":[6],"often":[7],"rely":[8],"on":[9],"on-the-fly":[10],"reasoning,":[11],"re-deriving":[12],"solutions":[13],"even":[14],"in":[15],"recurring":[16],"scenarios.":[17],"This":[18],"insufficient":[19],"experience":[20],"reuse":[21,122],"leads":[22],"to":[23,39,70,148],"computational":[24],"redundancy":[25],"and":[26,67,92,114,124,134,144],"instability.":[27],"To":[28,73],"bridge":[29],"this":[30],"gap,":[31],"we":[32,80],"propose":[33],"Skill-Pro,":[34],"a":[35,53,93],"framework":[36],"enabling":[37],"autonomously":[40],"learn":[41],"reusable":[42],"procedural":[43,107,146],"skills":[44],"from":[45],"interaction":[46],"experiences":[47],"without":[48,77],"parameter":[49],"updates.":[50],"By":[51],"formalizing":[52],"Skill-MDP,":[54],"Skill-Pro":[55,103,119,140],"transforms":[56],"passive":[57],"episodic":[58],"narratives":[59],"into":[60],"executable":[61],"Skills":[62],"defined":[63],"by":[64],"activation,":[65],"execution,":[66],"termination":[68],"conditions":[69],"ensure":[71],"executability.":[72],"achieve":[74],"reliable":[75],"reusability":[76],"capability":[78],"degradation,":[79],"introduce":[81],"Non-Parametric":[82],"PPO,":[83],"which":[84],"leverages":[85],"semantic":[86],"gradients":[87],"for":[88,96],"high-quality":[89,106],"candidate":[90],"generation":[91],"PPO":[94],"Gate":[95],"robust":[97],"Skill":[98,135],"verification.":[99],"Through":[100],"score-based":[101],"maintenance,":[102],"sustains":[104],"compact,":[105],"memory.":[108],"Experimental":[109],"results":[110],"across":[111],"in-domain,":[112],"cross-task,":[113],"cross-agent":[115],"scenarios":[116],"demonstrate":[117],"that":[118],"achieves":[120],"superior":[121],"rates":[123],"significant":[125],"gains":[126],"with":[127],"extreme":[128],"memory":[129],"compression.":[130],"Visualized":[131],"evolutionary":[132],"trajectories":[133],"distributions":[136],"further":[137],"reveal":[138],"how":[139],"transparently":[141],"accumulates,":[142],"refines,":[143],"reuses":[145],"knowledge":[147],"facilitate":[149],"long-term":[150],"autonomy.":[151]},"counts_by_year":[],"updated_date":"2026-05-30T06:14:24.967023","created_date":"2026-02-04T00:00:00"}
