{"id":"https://openalex.org/W2634239194","doi":"https://doi.org/10.1109/tcds.2017.2718938","title":"Combining Model-Based $Q$ -Learning With Structural Knowledge Transfer for Robot Skill Learning","display_name":"Combining Model-Based $Q$ -Learning With Structural Knowledge Transfer for Robot Skill Learning","publication_year":2017,"publication_date":"2017-06-22","ids":{"openalex":"https://openalex.org/W2634239194","doi":"https://doi.org/10.1109/tcds.2017.2718938","mag":"2634239194"},"language":"en","primary_location":{"id":"doi:10.1109/tcds.2017.2718938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2017.2718938","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005971230","display_name":"Zhen Deng","orcid":"https://orcid.org/0000-0002-0240-0919"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Zhen Deng","raw_affiliation_strings":["TAMS Group, Informatics, University of Hamburg, Hamburg, Germany"],"raw_orcid":"https://orcid.org/0000-0002-0240-0919","affiliations":[{"raw_affiliation_string":"TAMS Group, Informatics, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013710926","display_name":"Haojun Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Haojun Guan","raw_affiliation_strings":["TAMS Group, Informatics, University of Hamburg, Hamburg, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TAMS Group, Informatics, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007231606","display_name":"Rui Huang","orcid":"https://orcid.org/0000-0001-7231-5042"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Huang","raw_affiliation_strings":["Center for Robotics, School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Robotics, School of Automation Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053592210","display_name":"Hongzhuo Liang","orcid":"https://orcid.org/0000-0002-6870-9898"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hongzhuo Liang","raw_affiliation_strings":["TAMS Group, Informatics, University of Hamburg, Hamburg, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TAMS Group, Informatics, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370820","display_name":"Liwei Zhang","orcid":"https://orcid.org/0000-0003-3083-9002"},"institutions":[{"id":"https://openalex.org/I80947539","display_name":"Fuzhou University","ror":"https://ror.org/011xvna82","country_code":"CN","type":"education","lineage":["https://openalex.org/I80947539"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liwei Zhang","raw_affiliation_strings":["School of Mechanical Engineering and Automation, Fuzhou University, Fujian, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering and Automation, Fuzhou University, Fujian, China","institution_ids":["https://openalex.org/I80947539"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100326967","display_name":"Jianwei Zhang","orcid":"https://orcid.org/0000-0002-5491-1745"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jianwei Zhang","raw_affiliation_strings":["TAMS Group, Informatics, University of Hamburg, Hamburg, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TAMS Group, Informatics, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5005971230"],"corresponding_institution_ids":["https://openalex.org/I159176309"],"apc_list":null,"apc_paid":null,"fwci":2.0786,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.90097231,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"11","issue":"1","first_page":"26","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.833707332611084},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7428067326545715},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.6945580244064331},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6245386600494385},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6058019995689392},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.602664589881897},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5856812000274658},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5480917692184448},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.5400510430335999},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.5077901482582092},{"id":"https://openalex.org/keywords/inductive-transfer","display_name":"Inductive transfer","score":0.4856316149234772},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4774092435836792},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4579126238822937},{"id":"https://openalex.org/keywords/instance-based-learning","display_name":"Instance-based learning","score":0.41101136803627014},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.28048646450042725},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.10913246870040894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.833707332611084},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7428067326545715},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.6945580244064331},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6245386600494385},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6058019995689392},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.602664589881897},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5856812000274658},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5480917692184448},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.5400510430335999},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.5077901482582092},{"id":"https://openalex.org/C77075516","wikidata":"https://www.wikidata.org/wiki/Q6027324","display_name":"Inductive transfer","level":5,"score":0.4856316149234772},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4774092435836792},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4579126238822937},{"id":"https://openalex.org/C24138899","wikidata":"https://www.wikidata.org/wiki/Q17141258","display_name":"Instance-based learning","level":3,"score":0.41101136803627014},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.28048646450042725},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.10913246870040894},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcds.2017.2718938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2017.2718938","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W121023703","https://openalex.org/W778742492","https://openalex.org/W1516801383","https://openalex.org/W1587009522","https://openalex.org/W1746819321","https://openalex.org/W1929309940","https://openalex.org/W1972063518","https://openalex.org/W1986014385","https://openalex.org/W1998172110","https://openalex.org/W1999874108","https://openalex.org/W2012204020","https://openalex.org/W2012392077","https://openalex.org/W2042520683","https://openalex.org/W2069324174","https://openalex.org/W2088604406","https://openalex.org/W2097381042","https://openalex.org/W2104733512","https://openalex.org/W2105813220","https://openalex.org/W2111967991","https://openalex.org/W2117629901","https://openalex.org/W2123967136","https://openalex.org/W2126909264","https://openalex.org/W2133040789","https://openalex.org/W2139053308","https://openalex.org/W2140135625","https://openalex.org/W2143672530","https://openalex.org/W2144576818","https://openalex.org/W2161872510","https://openalex.org/W2165698076","https://openalex.org/W2167117957","https://openalex.org/W2168342951","https://openalex.org/W2524771588","https://openalex.org/W2533806771","https://openalex.org/W2559960928","https://openalex.org/W2593707696","https://openalex.org/W2964068416","https://openalex.org/W4211049957","https://openalex.org/W4214717370","https://openalex.org/W6604963999","https://openalex.org/W6653435097","https://openalex.org/W6674600207","https://openalex.org/W6675999342","https://openalex.org/W6676327744","https://openalex.org/W6676560026","https://openalex.org/W6678157427","https://openalex.org/W6679818365","https://openalex.org/W6680657880","https://openalex.org/W6681154564","https://openalex.org/W6728185193","https://openalex.org/W6734433544"],"related_works":["https://openalex.org/W2171128163","https://openalex.org/W4239202700","https://openalex.org/W2155431098","https://openalex.org/W3112076360","https://openalex.org/W2634239194","https://openalex.org/W2094878326","https://openalex.org/W2043387613","https://openalex.org/W2896220732","https://openalex.org/W4200584423","https://openalex.org/W4205569898"],"abstract_inverted_index":{"Learning":[0],"skills":[1,94],"autonomously":[2],"is":[3,15,33,70,110,131,148,165],"a":[4,46,61,144,169],"particularly":[5],"important":[6],"ability":[7],"for":[8,97,121],"an":[9],"autonomous":[10],"robot.":[11],"A":[12],"promising":[13],"approach":[14],"reinforcement":[16],"learning":[17,63,141,153,157],"(RL)":[18],"where":[19],"agents":[20],"learn":[21],"policy":[22,79,98],"through":[23],"interaction":[24],"with":[25,67],"its":[26],"environment.":[27],"One":[28],"problem":[29],"of":[30,59,155],"RL":[31,66],"algorithm":[32],"how":[34],"to":[35,49,55,112,119,137,150,167,174],"tradeoff":[36],"the":[37,57,92,106,114,134,139,152,175],"exploration":[38,164],"and":[39,85,100],"exploitation.":[40],"Moreover,":[41],"multiple":[42],"tasks":[43],"also":[44],"make":[45],"great":[47],"challenge":[48],"robot":[50,122],"learning.":[51,102],"In":[52,89,103],"this":[53,90,156],"paper,":[54],"enhance":[56],"performance":[58,154],"RL,":[60],"novel":[62],"framework":[64],"integrating":[65],"knowledge":[68,126,129,177,180],"transfer":[69,130],"proposed.":[71],"Three":[72],"basic":[73],"components":[74],"are":[75,95],"included:":[76],"1)":[77],"probability":[78],"reuse;":[80],"2)":[81],"dynamic":[82,101],"model":[83],"learning;":[84],"3)":[86],"model-based":[87,104,135],"Q-learning.":[88],"framework,":[91],"prelearned":[93],"leveraged":[96],"reuse":[99],"Q-learning,":[105],"Gaussian":[107],"process":[108],"regression":[109],"used":[111],"approximate":[113],"Q-value":[115],"function":[116],"so":[117],"as":[118],"suit":[120],"control.":[123],"The":[124],"prior":[125,176],"retrieved":[127],"from":[128,179],"integrated":[132],"into":[133],"Q-learning":[136],"reduce":[138],"needed":[140,166],"time.":[142],"Finally,":[143],"human-robot":[145],"handover":[146],"experiment":[147],"performed":[149],"evaluate":[151],"framework.":[158],"Experiment":[159],"results":[160],"show":[161],"that":[162],"fewer":[163],"obtain":[168],"high":[170],"expected":[171],"reward,":[172],"due":[173],"obtained":[178],"transfer.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
