{"id":"https://openalex.org/W4408067615","doi":"https://doi.org/10.1007/s40747-025-01785-0","title":"Demonstration and offset augmented meta reinforcement learning with sparse rewards","display_name":"Demonstration and offset augmented meta reinforcement learning with sparse rewards","publication_year":2025,"publication_date":"2025-02-28","ids":{"openalex":"https://openalex.org/W4408067615","doi":"https://doi.org/10.1007/s40747-025-01785-0"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-025-01785-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-01785-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01785-0.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01785-0.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112982696","display_name":"Haorui Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haorui Li","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043670878","display_name":"Jiaqi Liang","orcid":"https://orcid.org/0000-0003-4977-4093"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaqi Liang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100695922","display_name":"Xiaoxuan Wang","orcid":"https://orcid.org/0000-0003-3308-0759"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxuan Wang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110372401","display_name":"Chengzhi Jiang","orcid":"https://orcid.org/0009-0003-9708-2363"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengzhi Jiang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004618782","display_name":"Linjing Li","orcid":"https://orcid.org/0000-0002-8737-099X"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linjing Li","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038521974","display_name":"Daniel Zeng","orcid":"https://orcid.org/0000-0002-9046-222X"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daniel Zeng","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5112982696"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210100255","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":2.6124,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88871479,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"11","issue":"4","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-intelligence","display_name":"Computational intelligence","score":0.6848204731941223},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.6424977779388428},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5926918983459473},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5405482649803162},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5376405119895935},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37818634510040283},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18689486384391785},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.0481281578540802}],"concepts":[{"id":"https://openalex.org/C139502532","wikidata":"https://www.wikidata.org/wiki/Q1122090","display_name":"Computational intelligence","level":2,"score":0.6848204731941223},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.6424977779388428},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5926918983459473},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5405482649803162},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5376405119895935},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37818634510040283},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18689486384391785},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0481281578540802},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-025-01785-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-01785-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01785-0.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:edd807e971bf459fbc9c5f721e77cfa1","is_oa":true,"landing_page_url":"https://doaj.org/article/edd807e971bf459fbc9c5f721e77cfa1","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 11, Iss 4, Pp 1-20 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-025-01785-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-01785-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01785-0.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1966413184","display_name":null,"funder_award_id":"72293575","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1997021681","display_name":null,"funder_award_id":"72293573","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408067615.pdf"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W2119717200","https://openalex.org/W2124352385","https://openalex.org/W2151083897","https://openalex.org/W2158782408","https://openalex.org/W2240536489","https://openalex.org/W2302255633","https://openalex.org/W2788862220","https://openalex.org/W2963099939","https://openalex.org/W2963411833","https://openalex.org/W3022659297","https://openalex.org/W3041394538","https://openalex.org/W3085832734","https://openalex.org/W3096831136","https://openalex.org/W3168573424","https://openalex.org/W4220747123","https://openalex.org/W4283822150","https://openalex.org/W4285782692","https://openalex.org/W4382318909","https://openalex.org/W4392151673","https://openalex.org/W4393160024","https://openalex.org/W4402816868","https://openalex.org/W6600137863","https://openalex.org/W6600653764","https://openalex.org/W6605730092"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"This":[0,68],"paper":[1],"introduces":[2,92],"DOAMRL,":[3],"a":[4,20,93,128,144],"novel":[5,129],"meta-reinforcement":[6],"learning":[7,45,49],"(meta-RL)":[8],"method":[9,18,71],"that":[10,134,183],"extends":[11],"the":[12,52,56,65,70,74,80,108,118,137,140,148],"Model-Agnostic":[13],"Meta-Learning":[14],"(MAML)":[15],"framework.":[16],"The":[17],"addresses":[19],"key":[21],"limitation":[22],"of":[23,55,77,83,88,120,139,147],"existing":[24,159,186],"meta-RL":[25],"approaches,":[26],"which":[27,97],"struggle":[28],"to":[29,34,64,72,106],"effectively":[30,42],"use":[31],"suboptimal":[32,121,172],"demonstrations":[33],"guide":[35,107],"training":[36,165],"in":[37,101,161,188],"sparse":[38,102],"reward":[39,103],"environments.":[40],"DOAMRL":[41,91,184],"combines":[43],"reinforcement":[44],"(RL)":[46],"and":[47,79,166,171,179,191],"imitation":[48],"(IL)":[50],"within":[51],"inner":[53],"loop":[54],"MAML":[57],"framework,":[58],"with":[59,112,158,175],"dynamically":[60],"adjusted":[61],"weights":[62],"applied":[63],"IL":[66,84],"component.":[67],"enables":[69],"leverage":[73],"exploration":[75,100],"strengths":[76],"RL":[78],"efficiency":[81],"benefits":[82],"at":[85],"different":[86],"stages":[87,163],"training.":[89],"Additionally,":[90],"meta-learned":[94],"parameter":[95],"offset,":[96],"enhances":[98],"targeted":[99],"settings,":[104],"helping":[105],"meta-policy":[109],"toward":[110],"regions":[111],"non-zero":[113],"rewards.":[114],"To":[115],"further":[116],"mitigate":[117],"impact":[119],"demonstration":[122,130],"data":[123,131],"on":[124],"meta-training,":[125],"we":[126],"propose":[127],"enhancement":[132],"module":[133],"iteratively":[135],"improves":[136],"quality":[138],"demonstrations.":[141],"We":[142],"provide":[143],"comprehensive":[145,156],"analysis":[146],"proposed":[149],"method,":[150],"justifying":[151],"its":[152],"design":[153],"choices.":[154],"A":[155],"comparison":[157],"methods":[160],"various":[162],"(including":[164],"adaptation),":[167],"using":[168],"both":[169],"optimal":[170],"demonstrations,":[173],"along":[174],"results":[176],"from":[177],"ablation":[178],"sensitivity":[180],"analysis,":[181],"demonstrates":[182],"outperforms":[185],"approaches":[187],"performance,":[189],"applicability,":[190],"robustness.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
