{"id":"https://openalex.org/W2535997952","doi":"https://doi.org/10.1109/devlrn.2012.6400871","title":"Reinforcement learning algorithms that assimilate and accommodate skills with multiple tasks","display_name":"Reinforcement learning algorithms that assimilate and accommodate skills with multiple tasks","publication_year":2012,"publication_date":"2012-11-01","ids":{"openalex":"https://openalex.org/W2535997952","doi":"https://doi.org/10.1109/devlrn.2012.6400871","mag":"2535997952"},"language":"en","primary_location":{"id":"doi:10.1109/devlrn.2012.6400871","is_oa":false,"landing_page_url":"https://doi.org/10.1109/devlrn.2012.6400871","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://puma.isti.cnr.it/dfdownloadnew.php?ident=cnr.istc/cnr.istc/2012-A2-038","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031335878","display_name":"Paolo Tommasino","orcid":"https://orcid.org/0000-0003-3602-1594"},"institutions":[{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"funder","lineage":["https://openalex.org/I4210155236"]},{"id":"https://openalex.org/I4210166865","display_name":"Institute of Cognitive Sciences and Technologies","ror":"https://ror.org/05w9g2j85","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210155236","https://openalex.org/I4210166865"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Paolo Tommasino","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy","institution_ids":["https://openalex.org/I4210166865","https://openalex.org/I4210155236"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060335674","display_name":"Daniele Caligiore","orcid":"https://orcid.org/0000-0002-3040-9961"},"institutions":[{"id":"https://openalex.org/I4210166865","display_name":"Institute of Cognitive Sciences and Technologies","ror":"https://ror.org/05w9g2j85","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210155236","https://openalex.org/I4210166865"]},{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"funder","lineage":["https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Daniele Caligiore","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy","institution_ids":["https://openalex.org/I4210166865","https://openalex.org/I4210155236"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009773159","display_name":"Marco Mirolli","orcid":"https://orcid.org/0000-0002-9963-6871"},"institutions":[{"id":"https://openalex.org/I4210166865","display_name":"Institute of Cognitive Sciences and Technologies","ror":"https://ror.org/05w9g2j85","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210155236","https://openalex.org/I4210166865"]},{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"funder","lineage":["https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Mirolli","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy","institution_ids":["https://openalex.org/I4210166865","https://openalex.org/I4210155236"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076711458","display_name":"Gianluca Baldassarre","orcid":"https://orcid.org/0000-0002-1277-4447"},"institutions":[{"id":"https://openalex.org/I4210166865","display_name":"Institute of Cognitive Sciences and Technologies","ror":"https://ror.org/05w9g2j85","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210155236","https://openalex.org/I4210166865"]},{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"funder","lineage":["https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Gianluca Baldassarre","raw_affiliation_strings":["Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy"],"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale delle Ricerche (LOCEN-ISTC-CNR), Istituto di Scienze e Tecnologie della Cognizione, Roma, Italy","institution_ids":["https://openalex.org/I4210166865","https://openalex.org/I4210155236"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5031335878"],"corresponding_institution_ids":["https://openalex.org/I4210155236","https://openalex.org/I4210166865"],"apc_list":null,"apc_paid":null,"fwci":0.4299,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.63935334,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"10","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10982","display_name":"Motor Control and Adaptation","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10982","display_name":"Motor Control and Adaptation","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8442867994308472},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7419335842132568},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5766569972038269},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5369780659675598},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4923347234725952},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4252275824546814}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8442867994308472},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7419335842132568},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5766569972038269},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5369780659675598},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4923347234725952},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4252275824546814},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/devlrn.2012.6400871","is_oa":false,"landing_page_url":"https://doi.org/10.1109/devlrn.2012.6400871","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.713.8136","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.713.8136","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://laral.istc.cnr.it/mirolli/papers/TommasinoCaligioreMirolliBaldassarre2012ICDL.pdf","raw_type":"text"},{"id":"pmh:oai:pumaoai.isti.cnr.it:cnr.istc/cnr.istc/2012-A2-038","is_oa":true,"landing_page_url":"http://puma.isti.cnr.it/dfdownloadnew.php?ident=cnr.istc/cnr.istc/2012-A2-038","pdf_url":null,"source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"In: ICDL-EpiRob 2012 - 2012 International Conference on Development and Learning and Epigenetic Robotics (San Diego, CA, 7-9 November 2012). Proceedings, pp. 1 - 8. IEEE, 2012.","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:pumaoai.isti.cnr.it:cnr.istc/cnr.istc/2012-A2-038","is_oa":true,"landing_page_url":"http://puma.isti.cnr.it/dfdownloadnew.php?ident=cnr.istc/cnr.istc/2012-A2-038","pdf_url":null,"source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"In: ICDL-EpiRob 2012 - 2012 International Conference on Development and Learning and Epigenetic Robotics (San Diego, CA, 7-9 November 2012). Proceedings, pp. 1 - 8. IEEE, 2012.","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5299999713897705,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W144463461","https://openalex.org/W1504212531","https://openalex.org/W1554233645","https://openalex.org/W1592847719","https://openalex.org/W1607052939","https://openalex.org/W1682403713","https://openalex.org/W1974951252","https://openalex.org/W1988360637","https://openalex.org/W2005778100","https://openalex.org/W2012036715","https://openalex.org/W2092726287","https://openalex.org/W2097381042","https://openalex.org/W2097815751","https://openalex.org/W2097861969","https://openalex.org/W2098086734","https://openalex.org/W2099860458","https://openalex.org/W2108535023","https://openalex.org/W2113122939","https://openalex.org/W2113501460","https://openalex.org/W2121863487","https://openalex.org/W2150884987","https://openalex.org/W2157904933","https://openalex.org/W2162892219","https://openalex.org/W2167518172","https://openalex.org/W2172968643","https://openalex.org/W2185015080","https://openalex.org/W4214717370","https://openalex.org/W4249441547","https://openalex.org/W6674600207","https://openalex.org/W6676557315","https://openalex.org/W6676748545","https://openalex.org/W6686594932"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347","https://openalex.org/W4210805261"],"abstract_inverted_index":{"Children":[0],"are":[1,204],"capable":[2,205],"of":[3,8,12,51,85,97,102,109,140,148,206,249],"acquiring":[4],"a":[5,20,25,49,70,83,115,177,192],"large":[6],"repertoire":[7],"motor":[9,36],"skills":[10,37],"and":[11,41,79,104,152,164,172,187,209,220,245],"efficiently":[13],"adapting":[14],"them":[15],"to":[16,60,82,106,133,158,214,225,235,242],"novel":[17],"conditions.":[18],"In":[19,65],"previous":[21,135],"work":[22,61],"we":[23],"proposed":[24],"hierarchical":[26],"modular":[27],"reinforcement":[28,63,195],"learning":[29,81,108,151,196],"model":[30,45,54,68,90,117,136,232,240],"(RANK)":[31],"that":[32,55,124,143,201],"can":[33],"learn":[34],"multiple":[35,110,184],"in":[38,182,212],"continuous":[39],"action":[40],"state":[42],"spaces.":[43],"The":[44,89,128,198],"is":[46,137,233],"based":[47],"on":[48],"development":[50],"the":[52,67,95,99,134,138,141,145,170,173,189,222,230,238],"mixture-of-expert":[53],"has":[56],"been":[57],"suitably":[58],"developed":[59,93],"with":[62,94,131,176,191],"learning.":[64,166],"particular,":[66],"uses":[69],"high-level":[71],"gating":[72],"network":[73],"for":[74,77,80,150,153,162,165],"assigning":[75],"responsibilities":[76],"acting":[78],"set":[84],"low-level":[86],"expert":[87],"networks.":[88],"was":[91],"also":[92],"goal":[96],"exploiting":[98,207],"Piagetian":[100],"mechanisms":[101,142],"assimilation":[103,208],"accommodation":[105,210],"support":[107],"tasks.":[111],"This":[112,155],"paper":[113],"proposes":[114],"new":[116],"(TERL":[118],"-":[119],"Transfer":[120],"Expert":[121],"Reinforcement":[122],"Learning)":[123],"substantially":[125],"improves":[126],"RANK.":[127],"key":[129],"difference":[130],"respect":[132],"decoupling":[139],"generate":[144],"responsibility":[146],"signals":[147],"experts":[149],"control.":[154],"made":[156],"possible":[157],"satisfy":[159],"different":[160],"constraints":[161],"functioning":[163],"We":[167],"test":[168],"both":[169,202],"TERL":[171,231],"RANK":[174,239],"models":[175,203],"two-DOFs":[178],"dynamic":[179],"arm":[180],"engaged":[181],"solving":[183],"reaching":[185],"tasks,":[186,219],"compare":[188],"two":[190],"simple,":[193],"flat":[194],"model.":[197],"results":[199],"show":[200],"processes":[211],"order":[213],"transfer":[215],"knowledge":[216],"between":[217],"similar":[218],"at":[221],"same":[223],"time":[224],"avoid":[226],"catastrophic":[227],"interference.":[228],"Furthermore,":[229],"shown":[234],"significantly":[236],"outperform":[237],"thanks":[241],"its":[243],"faster":[244],"more":[246],"stable":[247],"specialization":[248],"experts.":[250]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
