{"id":"https://openalex.org/W4396875110","doi":"https://doi.org/10.1109/robosoft60065.2024.10522027","title":"Continual Policy Distillation of Reinforcement Learning-based Controllers for Soft Robotic In-Hand Manipulation","display_name":"Continual Policy Distillation of Reinforcement Learning-based Controllers for Soft Robotic In-Hand Manipulation","publication_year":2024,"publication_date":"2024-04-14","ids":{"openalex":"https://openalex.org/W4396875110","doi":"https://doi.org/10.1109/robosoft60065.2024.10522027"},"language":"en","primary_location":{"id":"doi:10.1109/robosoft60065.2024.10522027","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robosoft60065.2024.10522027","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 7th International Conference on Soft Robotics (RoboSoft)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059984667","display_name":"Lanpei Li","orcid":null},"institutions":[{"id":"https://openalex.org/I122991210","display_name":"Istituto di Scienza e Tecnologie dell'Informazione \"Alessandro Faedo\"","ror":"https://ror.org/05kacka20","country_code":"IT","type":"facility","lineage":["https://openalex.org/I122991210","https://openalex.org/I4210155236"]},{"id":"https://openalex.org/I108290504","display_name":"University of Pisa","ror":"https://ror.org/03ad39j10","country_code":"IT","type":"education","lineage":["https://openalex.org/I108290504"]},{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"funder","lineage":["https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Lanpei Li","raw_affiliation_strings":["University of Pisa,Department of Computer Science,Pisa,Italy,56127","Institute of Information Science and Technologies (ISTI), National Research Council (CNR), Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"University of Pisa,Department of Computer Science,Pisa,Italy,56127","institution_ids":["https://openalex.org/I108290504"]},{"raw_affiliation_string":"Institute of Information Science and Technologies (ISTI), National Research Council (CNR), Pisa, Italy","institution_ids":["https://openalex.org/I122991210","https://openalex.org/I4210155236"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041124796","display_name":"Enrico Donato","orcid":"https://orcid.org/0000-0002-8844-5279"},"institutions":[{"id":"https://openalex.org/I162290304","display_name":"Scuola Superiore Sant'Anna","ror":"https://ror.org/025602r80","country_code":"IT","type":"education","lineage":["https://openalex.org/I162290304"]},{"id":"https://openalex.org/I4210116474","display_name":"Center for Micro-BioRobotics","ror":"https://ror.org/02nbgxg43","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210116474"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Enrico Donato","raw_affiliation_strings":["The BioRobotics Institute, Sant&#x2019; Anna School of Advanced Studies,Pontedera (PI),Italy,56025","Departement of Excellence in Robotics & AI, Sant' Anna School of Advanced Studies, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"The BioRobotics Institute, Sant&#x2019; Anna School of Advanced Studies,Pontedera (PI),Italy,56025","institution_ids":["https://openalex.org/I4210116474"]},{"raw_affiliation_string":"Departement of Excellence in Robotics & AI, Sant' Anna School of Advanced Studies, Pisa, Italy","institution_ids":["https://openalex.org/I162290304"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023471278","display_name":"Vincenzo Lomonaco","orcid":"https://orcid.org/0000-0001-8308-6599"},"institutions":[{"id":"https://openalex.org/I108290504","display_name":"University of Pisa","ror":"https://ror.org/03ad39j10","country_code":"IT","type":"education","lineage":["https://openalex.org/I108290504"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Vincenzo Lomonaco","raw_affiliation_strings":["University of Pisa,Department of Computer Science,Pisa,Italy,56127"],"affiliations":[{"raw_affiliation_string":"University of Pisa,Department of Computer Science,Pisa,Italy,56127","institution_ids":["https://openalex.org/I108290504"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033570527","display_name":"Egidio Falotico","orcid":"https://orcid.org/0000-0001-8060-8080"},"institutions":[{"id":"https://openalex.org/I4210116474","display_name":"Center for Micro-BioRobotics","ror":"https://ror.org/02nbgxg43","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210116474"]},{"id":"https://openalex.org/I162290304","display_name":"Scuola Superiore Sant'Anna","ror":"https://ror.org/025602r80","country_code":"IT","type":"education","lineage":["https://openalex.org/I162290304"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Egidio Falotico","raw_affiliation_strings":["The BioRobotics Institute, Sant&#x2019; Anna School of Advanced Studies,Pontedera (PI),Italy,56025","Departement of Excellence in Robotics & AI, Sant' Anna School of Advanced Studies, Pisa, Italy"],"affiliations":[{"raw_affiliation_string":"The BioRobotics Institute, Sant&#x2019; Anna School of Advanced Studies,Pontedera (PI),Italy,56025","institution_ids":["https://openalex.org/I4210116474"]},{"raw_affiliation_string":"Departement of Excellence in Robotics & AI, Sant' Anna School of Advanced Studies, Pisa, Italy","institution_ids":["https://openalex.org/I162290304"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5059984667"],"corresponding_institution_ids":["https://openalex.org/I108290504","https://openalex.org/I122991210","https://openalex.org/I4210155236"],"apc_list":null,"apc_paid":null,"fwci":2.7392,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.90131916,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1026","last_page":"1033"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10868","display_name":"Soft Robotics and Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9682999849319458,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6906611919403076},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.6640356183052063},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6585517525672913},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6138787269592285},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.539654016494751},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.46855220198631287},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.4390808939933777},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.42367714643478394},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.41938576102256775},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3903895914554596}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6906611919403076},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.6640356183052063},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6585517525672913},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6138787269592285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.539654016494751},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.46855220198631287},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.4390808939933777},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.42367714643478394},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.41938576102256775},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3903895914554596},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/robosoft60065.2024.10522027","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robosoft60065.2024.10522027","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 7th International Conference on Soft Robotics (RoboSoft)","raw_type":"proceedings-article"},{"id":"pmh:oai:arpi.unipi.it:11568/1273867","is_oa":false,"landing_page_url":"https://ieeexplore.ieee.org/abstract/document/10522027","pdf_url":null,"source":{"id":"https://openalex.org/S4377196265","display_name":"CINECA IRIS Institutial research information system (University of Pisa)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I108290504","host_organization_name":"University of Pisa","host_organization_lineage":["https://openalex.org/I108290504"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:dnet:iris________::2ed099310fa6fba0a69b8ecc84671f5c","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference article"},{"id":"pmh:oai:www.iris.sssup.it:11382/580313","is_oa":false,"landing_page_url":"https://hdl.handle.net/11382/580313","pdf_url":null,"source":{"id":"https://openalex.org/S4377196376","display_name":"CINECA IRIS Institutional Research Information System (Sant'Anna School of Advanced Studies)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162290304","host_organization_name":"Scuola Superiore Sant'Anna","host_organization_lineage":["https://openalex.org/I162290304"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:dnet:iris________::2ed099310fa6fba0a69b8ecc84671f5c","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6100000143051147}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W201257430","https://openalex.org/W1821462560","https://openalex.org/W2325845790","https://openalex.org/W2784648116","https://openalex.org/W2901112449","https://openalex.org/W2939137134","https://openalex.org/W2943200793","https://openalex.org/W2947461406","https://openalex.org/W2950069298","https://openalex.org/W2962515681","https://openalex.org/W2996514457","https://openalex.org/W3127352841","https://openalex.org/W3138833245","https://openalex.org/W3173681440","https://openalex.org/W3216772467","https://openalex.org/W4200306000","https://openalex.org/W4210478179","https://openalex.org/W4210894397","https://openalex.org/W4224862098","https://openalex.org/W4226256030","https://openalex.org/W4226301680","https://openalex.org/W4292793890","https://openalex.org/W4376605651","https://openalex.org/W4378192004","https://openalex.org/W4387197008","https://openalex.org/W4387407795","https://openalex.org/W6763462227","https://openalex.org/W6764969207","https://openalex.org/W6804601995","https://openalex.org/W6849015028"],"related_works":["https://openalex.org/W2357124094","https://openalex.org/W2387399993","https://openalex.org/W2389739210","https://openalex.org/W2348924972","https://openalex.org/W2365736347","https://openalex.org/W2047454415","https://openalex.org/W2070040999","https://openalex.org/W2387293848","https://openalex.org/W4387399830","https://openalex.org/W1994643058"],"abstract_inverted_index":{"Dexterous":[0],"manipulation,":[1,76],"often":[2],"facilitated":[3],"by":[4],"multi-fingered":[5],"robotic":[6,15],"hands,":[7,16],"holds":[8],"solid":[9],"impact":[10],"for":[11,41,74,147],"real-world":[12],"ap-plications.":[13],"Soft":[14],"due":[17],"to":[18,50,69,77,96,102,115],"their":[19],"compliant":[20],"nature,":[21],"offer":[22],"flexibility":[23],"and":[24,29,59,83,119,141,144],"adaptability":[25,58],"during":[26],"object":[27],"grasping":[28],"manipulation.":[30],"Yet,":[31],"benefits":[32],"come":[33],"with":[34],"challenges,":[35],"particularly":[36],"in":[37,81,135],"the":[38,125],"control":[39],"development":[40],"finger":[42],"coordination.":[43],"Reinforce-ment":[44],"Learning":[45],"(RL)":[46],"can":[47],"be":[48],"employed":[49],"train":[51],"object-specific":[52],"in-hand":[53,75,148],"manipulation":[54,149],"policies,":[55],"but":[56],"limiting":[57],"generalizability.":[60],"We":[61],"introduce":[62],"a":[63,71,86,103],"Continual":[64],"Policy":[65,93],"Distillation":[66,94],"(CPD)":[67],"framework":[68,91,127],"acquire":[70],"versatile":[72,143],"controller":[73],"rotate":[78],"different":[79],"objects":[80],"shape":[82],"size":[84],"within":[85],"four-fingered":[87],"soft":[88],"gripper.":[89],"The":[90,122],"leverages":[92],"(PD)":[95],"transfer":[97],"knowledge":[98,137],"from":[99,138],"expert":[100],"policies":[101],"continually":[104],"evolving":[105],"student":[106],"policy":[107],"network.":[108],"Exemplar-based":[109],"rehearsal":[110],"methods":[111],"are":[112],"then":[113],"integrated":[114],"mitigate":[116],"catastrophic":[117],"forgetting":[118],"enhance":[120],"generalization.":[121],"performance":[123],"of":[124],"CPD":[126],"over":[128],"various":[129],"replay":[130],"strategies":[131],"demonstrates":[132],"its":[133],"effectiveness":[134],"consolidating":[136],"multiple":[139],"experts":[140],"achieving":[142],"adaptive":[145],"behaviours":[146],"tasks.":[150]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
