{"id":"https://openalex.org/W4416251613","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228330","title":"Robust Meta Reinforcement Learning via Environment Context Enhancement","display_name":"Robust Meta Reinforcement Learning via Environment Context Enhancement","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251613","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228330"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228330","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228330","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101253103","display_name":"Wenning Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenning Hu","raw_affiliation_strings":["Harbin Engineering University,College of Computer Science and Technology,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,College of Computer Science and Technology,Harbin,China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100332096","display_name":"Hongbin Wang","orcid":"https://orcid.org/0000-0003-2176-2998"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongbin Wang","raw_affiliation_strings":["Harbin Engineering University,College of Computer Science and Technology,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,College of Computer Science and Technology,Harbin,China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101558156","display_name":"Xirui Chen","orcid":"https://orcid.org/0000-0001-7867-2040"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xirui Chen","raw_affiliation_strings":["Harbin Engineering University,College of Computer Science and Technology,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,College of Computer Science and Technology,Harbin,China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031640535","display_name":"Nianbin Wang","orcid":"https://orcid.org/0000-0003-1738-7937"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nianbin Wang","raw_affiliation_strings":["Harbin Engineering University,College of Computer Science and Technology,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,College of Computer Science and Technology,Harbin,China","institution_ids":["https://openalex.org/I151727225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101253103"],"corresponding_institution_ids":["https://openalex.org/I151727225"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19483242,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7443000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7443000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.034299999475479126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.019200000911951065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.703499972820282},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6769999861717224},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5511000156402588},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4828999936580658},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.421999990940094},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.40529999136924744},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.35519999265670776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.708899974822998},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.703499972820282},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6769999861717224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5903000235557556},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5717999935150146},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5511000156402588},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4828999936580658},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.421999990940094},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.40529999136924744},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.35519999265670776},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.3264000117778778},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2791000008583069}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228330","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228330","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2158782408","https://openalex.org/W2168565265","https://openalex.org/W2951360122","https://openalex.org/W3175254947","https://openalex.org/W3176624977","https://openalex.org/W4206149971","https://openalex.org/W4221110788","https://openalex.org/W4221144359","https://openalex.org/W4283365039","https://openalex.org/W4379740050","https://openalex.org/W4386083023","https://openalex.org/W4386753570","https://openalex.org/W4393147287","https://openalex.org/W4393159835"],"related_works":[],"abstract_inverted_index":{"Meta-reinforcement":[0],"learning":[1,30],"with":[2],"contexts":[3],"enables":[4],"the":[5,18,21,24,29,56,67,90,93,98,113,122,125,131,139,145,168],"policy":[6],"to":[7,10,23,61,78,81,88,120,135,161],"quickly":[8],"adapt":[9],"new":[11],"tasks,":[12],"which":[13,42,102,129],"is":[14,142],"important":[15],"for":[16,108],"bridging":[17],"gap":[19],"between":[20],"simulation":[22],"real":[25],"uncertain":[26,85,183],"environment.":[27],"However,":[28],"of":[31,58,92,124,155],"traditional":[32],"context":[33,59,100,115,126,132],"encoders":[34,60,101],"relies":[35],"on":[36,159,171],"transition":[37],"data":[38,107],"collected":[39],"during":[40],"meta-training,":[41],"results":[43],"in":[44,84],"all":[45],"environments":[46,86],"being":[47],"mapped":[48],"into":[49],"a":[50,153],"single":[51],"encoding":[52],"space":[53],"and":[54,87,174],"reduces":[55],"sensitivity":[57,134],"environmental":[62,106,136,184],"uncertainties.":[63],"This":[64],"work":[65],"offers":[66],"Robust":[68],"Meta":[69],"Reinforcement":[70],"Learning":[71],"via":[72],"Environment":[73],"Context":[74],"Enhancement":[75],"(RMRL-CE)":[76],"algorithm":[77],"enhance":[79],"adaptability":[80],"abrupt":[82],"changes":[83],"augment":[89],"robustness":[91,179],"policy.":[94],"Firstly,":[95],"we":[96,111],"use":[97],"local-global":[99],"offer":[103],"more":[104],"precise":[105],"decision-making.":[109],"Subsequently,":[110],"train":[112],"global":[114],"encoder":[116],"by":[117],"intra-class":[118],"loss":[119],"simplify":[121],"segmentation":[123],"embedding":[127],"space,":[128],"improves":[130],"encoder\u2019s":[133],"uncertainty.":[137],"Finally,":[138],"comprehensive":[140],"meta-policy":[141],"developed":[143],"alongside":[144],"conditional":[146],"value-at-risk":[147],"(CVaR)":[148],"optimization":[149],"method.":[150],"We":[151],"used":[152],"set":[154],"sequential":[156],"control":[157],"tasks":[158],"MuJoCo":[160],"make":[162],"meta-test":[163],"tasks.":[164],"Our":[165],"method":[166],"outperformed":[167],"benchmark":[169],"methods":[170],"both":[172],"mean":[173],"CVaR":[175],"returns,":[176],"demonstrating":[177],"high":[178],"even":[180],"under":[181],"harsh":[182],"conditions.":[185]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
