{"id":"https://openalex.org/W4380763481","doi":"https://doi.org/10.1109/tcds.2023.3286465","title":"Supervised Meta-Reinforcement Learning With Trajectory Optimization for Manipulation Tasks","display_name":"Supervised Meta-Reinforcement Learning With Trajectory Optimization for Manipulation Tasks","publication_year":2023,"publication_date":"2023-06-15","ids":{"openalex":"https://openalex.org/W4380763481","doi":"https://doi.org/10.1109/tcds.2023.3286465"},"language":"en","primary_location":{"id":"doi:10.1109/tcds.2023.3286465","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2023.3286465","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pure.ulster.ac.uk/ws/files/121932869/Supervised_Meta_Reinforcement_Learning_with_Trajectory_Optimization_for_Manipulation_Tasks.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009524384","display_name":"Lei Wang","orcid":"https://orcid.org/0000-0003-1101-3221"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Wang","raw_affiliation_strings":["Faculty of Robot Science and Engineering, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Robot Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038287028","display_name":"Yunzhou Zhang","orcid":"https://orcid.org/0000-0003-0610-3732"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunzhou Zhang","raw_affiliation_strings":["College of Information Science and Engineering, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072234330","display_name":"Delong Zhu","orcid":"https://orcid.org/0000-0002-1143-7860"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Delong Zhu","raw_affiliation_strings":["Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, SAR, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070227862","display_name":"Sonya Coleman","orcid":"https://orcid.org/0000-0002-4676-7640"},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sonya Coleman","raw_affiliation_strings":["School of Computing, Engineering and Intelligent Systems, Ulster University, Coleraine, U.K"],"affiliations":[{"raw_affiliation_string":"School of Computing, Engineering and Intelligent Systems, Ulster University, Coleraine, U.K","institution_ids":["https://openalex.org/I138801177"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015138346","display_name":"Dermot Kerr","orcid":"https://orcid.org/0000-0002-5077-0658"},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dermot Kerr","raw_affiliation_strings":["School of Computing, Engineering and Intelligent Systems, Ulster University, Coleraine, U.K"],"affiliations":[{"raw_affiliation_string":"School of Computing, Engineering and Intelligent Systems, Ulster University, Coleraine, U.K","institution_ids":["https://openalex.org/I138801177"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5009524384"],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":1.2098,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82910864,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"16","issue":"2","first_page":"681","last_page":"691"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8745545148849487},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7863670587539673},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta learning (computer science)","score":0.5939154624938965},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5847200155258179},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5637377500534058},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40195900201797485},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3802913427352905},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.27061086893081665}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8745545148849487},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7863670587539673},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.5939154624938965},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5847200155258179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5637377500534058},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40195900201797485},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3802913427352905},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.27061086893081665},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcds.2023.3286465","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2023.3286465","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire/0e50a410-ec8e-410b-9224-68b504e6ab1f","is_oa":true,"landing_page_url":"https://pure.ulster.ac.uk/en/publications/0e50a410-ec8e-410b-9224-68b504e6ab1f","pdf_url":"https://pure.ulster.ac.uk/ws/files/121932869/Supervised_Meta_Reinforcement_Learning_with_Trajectory_Optimization_for_Manipulation_Tasks.pdf","source":{"id":"https://openalex.org/S4306402454","display_name":"Ulster University Research Portal (Ulster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138801177","host_organization_name":"University of Ulster","host_organization_lineage":["https://openalex.org/I138801177"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Wang, L, Zhang, Y, Zhu, D, Coleman, S & Kerr, D 2023, 'Supervised Meta-Reinforcement Learning with Trajectory Optimization for Manipulation Tasks', IEEE Transactions on Cognitive and Developmental Systems, vol. 16, no. 2, pp. 681-691. https://doi.org/10.1109/tcds.2023.3286465","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:openaire/0e50a410-ec8e-410b-9224-68b504e6ab1f","is_oa":true,"landing_page_url":"https://pure.ulster.ac.uk/en/publications/0e50a410-ec8e-410b-9224-68b504e6ab1f","pdf_url":"https://pure.ulster.ac.uk/ws/files/121932869/Supervised_Meta_Reinforcement_Learning_with_Trajectory_Optimization_for_Manipulation_Tasks.pdf","source":{"id":"https://openalex.org/S4306402454","display_name":"Ulster University Research Portal (Ulster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138801177","host_organization_name":"University of Ulster","host_organization_lineage":["https://openalex.org/I138801177"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Wang, L, Zhang, Y, Zhu, D, Coleman, S & Kerr, D 2023, 'Supervised Meta-Reinforcement Learning with Trajectory Optimization for Manipulation Tasks', IEEE Transactions on Cognitive and Developmental Systems, vol. 16, no. 2, pp. 681-691. https://doi.org/10.1109/tcds.2023.3286465","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2007767059","display_name":null,"funder_award_id":"61471110","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4285396571","display_name":null,"funder_award_id":"61973066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4380763481.pdf","grobid_xml":"https://content.openalex.org/works/W4380763481.grobid-xml"},"referenced_works_count":62,"referenced_works":["https://openalex.org/W1499669280","https://openalex.org/W1923344279","https://openalex.org/W2018705428","https://openalex.org/W2126909264","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2158782408","https://openalex.org/W2578206533","https://openalex.org/W2736601468","https://openalex.org/W2742093937","https://openalex.org/W2766447205","https://openalex.org/W2785397462","https://openalex.org/W2787501667","https://openalex.org/W2963864421","https://openalex.org/W2982316857","https://openalex.org/W2989847975","https://openalex.org/W2997756730","https://openalex.org/W2998381804","https://openalex.org/W3005581722","https://openalex.org/W3006182894","https://openalex.org/W3107113899","https://openalex.org/W3107951310","https://openalex.org/W3118210634","https://openalex.org/W3121078556","https://openalex.org/W3127561923","https://openalex.org/W4253187718","https://openalex.org/W4289388948","https://openalex.org/W4294646197","https://openalex.org/W4297943175","https://openalex.org/W4300971732","https://openalex.org/W6607786297","https://openalex.org/W6638018090","https://openalex.org/W6640290305","https://openalex.org/W6675999342","https://openalex.org/W6678367057","https://openalex.org/W6679524480","https://openalex.org/W6680657880","https://openalex.org/W6681631837","https://openalex.org/W6682849425","https://openalex.org/W6684921986","https://openalex.org/W6729433768","https://openalex.org/W6731982132","https://openalex.org/W6736057607","https://openalex.org/W6736742618","https://openalex.org/W6738279954","https://openalex.org/W6741002519","https://openalex.org/W6742288159","https://openalex.org/W6744181227","https://openalex.org/W6747473740","https://openalex.org/W6747625265","https://openalex.org/W6747943641","https://openalex.org/W6748317118","https://openalex.org/W6748600884","https://openalex.org/W6750254146","https://openalex.org/W6751869817","https://openalex.org/W6755476724","https://openalex.org/W6760698134","https://openalex.org/W6761008914","https://openalex.org/W6765478815","https://openalex.org/W6769596995","https://openalex.org/W6785535465","https://openalex.org/W6843564457"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W3130669838","https://openalex.org/W4294873804","https://openalex.org/W4383109125","https://openalex.org/W2091347716","https://openalex.org/W98577079","https://openalex.org/W4301772239","https://openalex.org/W2891227010","https://openalex.org/W4319309271","https://openalex.org/W2548988175"],"abstract_inverted_index":{"Learning":[0],"from":[1,41,60,174],"small":[2],"amounts":[3,66],"of":[4,67,143,202,207],"samples":[5,59,69,108],"with":[6,83,216],"reinforcement":[7],"learning":[8,117,153],"(RL)":[9],"is":[10,100,127],"challenging":[11],"in":[12,16,139,146,154,163],"many":[13],"tasks,":[14,210],"especially,":[15],"real-world":[17],"applications,":[18],"such":[19],"as":[20,27],"robotics.":[21],"Meta-RL":[22],"(meta-RL)":[23],"has":[24],"been":[25],"proposed":[26,221],"an":[28],"approach":[29,99,222],"to":[30,36,102,134,170,184,189,198],"address":[31],"this":[32,73,179],"problem":[33],"by":[34,51,87],"generalizing":[35],"new":[37],"tasks":[38],"through":[39,109],"experience":[40],"previous":[42,218],"similar":[43],"tasks.":[44],"However,":[45],"these":[46],"approaches":[47],"generally":[48],"perform":[49],"meta-optimization":[50,120],"focusing":[52],"direct":[53],"policy":[54,123],"search":[55],"methods":[56],"on":[57],"validation":[58,107],"adapted":[61],"policies,":[62],"thus,":[63],"requiring":[64],"large":[65],"on-policy":[68],"during":[70],"meta-training.":[71],"To":[72],"end,":[74],"we":[75,181,211],"propose":[76],"a":[77,129,205],"novel":[78],"algorithm":[79],"called":[80],"supervised":[81],"meta-RL":[82],"trajectory":[84,96,111,176,187],"optimization":[85,112,131,188],"(SMRL-TO)":[86],"integrating":[88],"model-agnostic":[89],"meta-learning":[90],"(MAML)":[91],"and":[92,113,150,193,228],"iterative":[93],"LQR":[94],"(iLQR)-based":[95],"optimization.":[97,177],"Our":[98],"designed":[101],"provide":[103],"online":[104,151],"supervision":[105,173],"for":[106],"iLQR-based":[110,175,186],"embed":[114],"simple":[115],"imitation":[116,152],"into":[118],"the":[119,147,155,171,200,217,220],"rather":[121],"than":[122],"gradient":[124,137],"steps.":[125],"This":[126],"actually":[128],"bi-level":[130],"that":[132,214],"needs":[133],"calculate":[135],"several":[136],"updates":[138],"each":[140],"meta-iteration,":[141],"consisting":[142],"off-policy":[144],"RL":[145],"inner":[148],"loop":[149],"outer":[156],"loop.":[157],"SMRL-TO":[158],"can":[159,223],"achieve":[160,229],"significant":[161],"improvements":[162],"sample":[164,226],"efficiency":[165,227],"without":[166],"human-provided":[167],"demonstrations,":[168],"due":[169],"effective":[172],"In":[178],"article,":[180],"describe":[182],"how":[183,195],"use":[185],"obtain":[190],"labeled":[191],"data":[192],"then":[194],"leverage":[196],"them":[197],"assist":[199],"training":[201],"meta-learner.":[203],"Through":[204],"series":[206],"robotic":[208],"manipulation":[209],"further":[212],"show":[213],"compared":[215],"methods,":[219],"substantially":[224],"improve":[225],"better":[230],"asymptotic":[231],"performance.":[232]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
