{"id":"https://openalex.org/W2986925736","doi":"https://doi.org/10.1109/tcyb.2019.2949596","title":"Task-Oriented Deep Reinforcement Learning for Robotic Skill Acquisition and Control","display_name":"Task-Oriented Deep Reinforcement Learning for Robotic Skill Acquisition and Control","publication_year":2019,"publication_date":"2019-11-12","ids":{"openalex":"https://openalex.org/W2986925736","doi":"https://doi.org/10.1109/tcyb.2019.2949596","mag":"2986925736","pmid":"https://pubmed.ncbi.nlm.nih.gov/31725408"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2019.2949596","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2019.2949596","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031150449","display_name":"Guofei Xiang","orcid":"https://orcid.org/0000-0003-4934-4237"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guofei Xiang","raw_affiliation_strings":["Key Laboratory of System Control and Information Processing, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of System Control and Information Processing, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084573070","display_name":"Jianbo Su","orcid":"https://orcid.org/0000-0001-6931-5842"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianbo Su","raw_affiliation_strings":["Key Laboratory of System Control and Information Processing, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of System Control and Information Processing, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5031150449"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":3.3272,"has_fulltext":false,"cited_by_count":53,"citation_normalized_percentile":{"value":0.94015074,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"51","issue":"2","first_page":"1056","last_page":"1069"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8748202323913574},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7401803731918335},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6670377254486084},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6333376169204712},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5776107907295227},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5534371733665466},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5305631160736084},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.48067888617515564},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.44415083527565},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43822258710861206},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.43371888995170593},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1562829613685608},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.11684203147888184}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8748202323913574},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7401803731918335},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6670377254486084},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6333376169204712},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5776107907295227},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5534371733665466},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5305631160736084},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.48067888617515564},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.44415083527565},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43822258710861206},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.43371888995170593},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1562829613685608},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.11684203147888184},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2019.2949596","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2019.2949596","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:31725408","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/31725408","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3902858495","display_name":null,"funder_award_id":"91748120","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G635920217","display_name":null,"funder_award_id":"61521063","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7937147596","display_name":null,"funder_award_id":"61533012","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":95,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W112666333","https://openalex.org/W1522301498","https://openalex.org/W1737105075","https://openalex.org/W1771410628","https://openalex.org/W1850531616","https://openalex.org/W1923344279","https://openalex.org/W1931877416","https://openalex.org/W1975463331","https://openalex.org/W1977655452","https://openalex.org/W1986014385","https://openalex.org/W1999874108","https://openalex.org/W2012587148","https://openalex.org/W2061562262","https://openalex.org/W2083954950","https://openalex.org/W2098774185","https://openalex.org/W2099471712","https://openalex.org/W2121863487","https://openalex.org/W2134491302","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2155968351","https://openalex.org/W2158349948","https://openalex.org/W2158782408","https://openalex.org/W2169498096","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2342662072","https://openalex.org/W2343568200","https://openalex.org/W2417786368","https://openalex.org/W2593044849","https://openalex.org/W2594103415","https://openalex.org/W2594640072","https://openalex.org/W2596982695","https://openalex.org/W2604382266","https://openalex.org/W2738778707","https://openalex.org/W2746553466","https://openalex.org/W2754517384","https://openalex.org/W2770884134","https://openalex.org/W2781726626","https://openalex.org/W2785962646","https://openalex.org/W2788862220","https://openalex.org/W2794908222","https://openalex.org/W2910474428","https://openalex.org/W2911087563","https://openalex.org/W2919115771","https://openalex.org/W2949561945","https://openalex.org/W2950735232","https://openalex.org/W2951507724","https://openalex.org/W2962736495","https://openalex.org/W2962957031","https://openalex.org/W2963099939","https://openalex.org/W2963120839","https://openalex.org/W2963221965","https://openalex.org/W2963277051","https://openalex.org/W2963328631","https://openalex.org/W2963376229","https://openalex.org/W2963639957","https://openalex.org/W2963641140","https://openalex.org/W2963674921","https://openalex.org/W2963713397","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2964121744","https://openalex.org/W2964161785","https://openalex.org/W2968340082","https://openalex.org/W3101780148","https://openalex.org/W4205513846","https://openalex.org/W4297664295","https://openalex.org/W4320013936","https://openalex.org/W6604566608","https://openalex.org/W6631190155","https://openalex.org/W6637650041","https://openalex.org/W6638018090","https://openalex.org/W6639056794","https://openalex.org/W6640174482","https://openalex.org/W6674884181","https://openalex.org/W6682849425","https://openalex.org/W6684921986","https://openalex.org/W6692846177","https://openalex.org/W6704571135","https://openalex.org/W6715102896","https://openalex.org/W6716474083","https://openalex.org/W6718092244","https://openalex.org/W6734205138","https://openalex.org/W6734206676","https://openalex.org/W6734517396","https://openalex.org/W6735033012","https://openalex.org/W6738261575","https://openalex.org/W6744123322","https://openalex.org/W6746462176","https://openalex.org/W6747473740","https://openalex.org/W6748645729","https://openalex.org/W6755230031","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W17155033"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,5],"(RL)":[2],"and":[3,22,28,63,82,170,177],"imitation":[4],"(IL),":[6],"especially":[7],"equipped":[8],"with":[9,71,142,175],"deep":[10,146],"neural":[11],"networks,":[12],"have":[13],"been":[14],"widely":[15],"studied":[16],"for":[17,59,203],"autonomous":[18],"robotic":[19,60,125],"skill":[20,61],"acquisition":[21,62],"control":[23],"tasks.":[24],"However,":[25],"these":[26,197],"methods":[27,198],"their":[29],"extensions":[30],"require":[31],"extensive":[32],"environmental":[33],"interactions":[34],"during":[35],"training,":[36],"which":[37,76],"greatly":[38],"prevents":[39],"them":[40],"from":[41],"being":[42],"applied":[43],"to":[44,186,195,199],"real-world":[45],"robots.":[46,205],"To":[47],"alleviate":[48],"this":[49,87],"problem,":[50],"we":[51],"present":[52],"an":[53],"efficient":[54],"model-free":[55],"off-policy":[56],"actor-critic":[57],"algorithm":[58,162],"continuous":[64],"control,":[65],"by":[66,79,137],"fusing":[67],"the":[68,89,93,104,109,130,143,157,160],"task":[69],"reward":[70],"a":[72,191],"task-oriented":[73],"guiding":[74],"reward,":[75,179],"is":[77],"formulated":[78],"leveraging":[80],"few":[81],"imperfect":[83],"expert":[84],"demonstrations.":[85],"In":[86],"framework,":[88],"agent":[90,105],"can":[91,100,106,117,133],"explore":[92],"environment":[94],"more":[95,111],"intentionally,":[96],"thus":[97],"sampling":[98,168],"efficiency":[99,169],"be":[101,118],"achieved;":[102],"moreover,":[103],"also":[107],"exploit":[108],"experience":[110],"effectively,":[112],"thereby":[113],"substantially":[114],"improved":[115],"performance":[116,152,172],"realized":[119],"simultaneously.":[120],"The":[121],"empirical":[122],"results":[123],"on":[124,173],"locomotion":[126],"tasks":[127,174],"show":[128],"that":[129,155],"proposed":[131,161],"scheme":[132],"lower":[134],"sample":[135],"complexity":[136],"2-10":[138],"times":[139],"in":[140,166],"contrast":[141],"state-of-the-art":[144],"baseline":[145,182],"RL":[147],"(DRL)":[148],"algorithms,":[149],"while":[150],"achieving":[151],"better":[153],"than":[154],"of":[156],"expert.":[158],"Furthermore,":[159],"achieves":[163],"significant":[164],"improvement":[165],"both":[167],"asymptotic":[171],"sparse":[176],"delayed":[178],"wherein":[180],"those":[181],"DRL":[183],"algorithms":[184],"struggle":[185],"make":[187],"progress.":[188],"This":[189],"takes":[190],"substantial":[192],"step":[193],"forward":[194],"implement":[196],"acquire":[200],"skills":[201],"autonomously":[202],"real":[204]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":5}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
