{"id":"https://openalex.org/W3097787172","doi":"https://doi.org/10.1109/icnsc48988.2020.9238129","title":"An Overview of Robust Reinforcement Learning","display_name":"An Overview of Robust Reinforcement Learning","publication_year":2020,"publication_date":"2020-10-30","ids":{"openalex":"https://openalex.org/W3097787172","doi":"https://doi.org/10.1109/icnsc48988.2020.9238129","mag":"3097787172"},"language":"en","primary_location":{"id":"doi:10.1109/icnsc48988.2020.9238129","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icnsc48988.2020.9238129","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Networking, Sensing and Control (ICNSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100655819","display_name":"Shiyu Chen","orcid":"https://orcid.org/0000-0002-7157-4402"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiyu Chen","raw_affiliation_strings":["School of Mechanical Engineering and Automation, Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering and Automation, Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100677178","display_name":"Yanjie Li","orcid":"https://orcid.org/0000-0001-7890-9677"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjie Li","raw_affiliation_strings":["School of Mechanical Engineering and Automation, Harbin Institute of Technology, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering and Automation, Harbin Institute of Technology, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100655819"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.7954,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.7887502,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8765852451324463},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7545889019966125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5627559423446655},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5389493107795715},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5322520732879639},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5158066749572754},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4957638680934906},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46949532628059387},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.4694884419441223},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4340674579143524},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3322998285293579},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11307400465011597},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.099175363779068}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8765852451324463},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7545889019966125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5627559423446655},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5389493107795715},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5322520732879639},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5158066749572754},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4957638680934906},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46949532628059387},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.4694884419441223},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4340674579143524},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3322998285293579},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11307400465011597},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.099175363779068},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icnsc48988.2020.9238129","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icnsc48988.2020.9238129","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Networking, Sensing and Control (ICNSC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W114517082","https://openalex.org/W1484551447","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1949804828","https://openalex.org/W1965878388","https://openalex.org/W1977655452","https://openalex.org/W2004220372","https://openalex.org/W2100110221","https://openalex.org/W2105078254","https://openalex.org/W2108734173","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2136503687","https://openalex.org/W2140135625","https://openalex.org/W2145339207","https://openalex.org/W2145341417","https://openalex.org/W2165150801","https://openalex.org/W2165622730","https://openalex.org/W2168565265","https://openalex.org/W2169186423","https://openalex.org/W2169209873","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2541678333","https://openalex.org/W2553297237","https://openalex.org/W2580909119","https://openalex.org/W2583993537","https://openalex.org/W2602963933","https://openalex.org/W2606508169","https://openalex.org/W2727840223","https://openalex.org/W2736601468","https://openalex.org/W2754930636","https://openalex.org/W2761873684","https://openalex.org/W2781585732","https://openalex.org/W2793745738","https://openalex.org/W2917175941","https://openalex.org/W2918394844","https://openalex.org/W2919115771","https://openalex.org/W2920362155","https://openalex.org/W2950300520","https://openalex.org/W2952867909","https://openalex.org/W2962959294","https://openalex.org/W2962977206","https://openalex.org/W2963477884","https://openalex.org/W2963796870","https://openalex.org/W2963864421","https://openalex.org/W2964291307","https://openalex.org/W2966684444","https://openalex.org/W3100944043","https://openalex.org/W4288481378","https://openalex.org/W4288567475","https://openalex.org/W4298023569","https://openalex.org/W4298857966","https://openalex.org/W4302570325","https://openalex.org/W6638018090","https://openalex.org/W6674961657","https://openalex.org/W6680139899","https://openalex.org/W6680657880","https://openalex.org/W6681267953","https://openalex.org/W6684205842","https://openalex.org/W6684892534","https://openalex.org/W6685331716","https://openalex.org/W6735677848","https://openalex.org/W6744838376"],"related_works":["https://openalex.org/W3074294383","https://openalex.org/W4233452137","https://openalex.org/W4206669594","https://openalex.org/W2961085424","https://openalex.org/W2959276766","https://openalex.org/W4254857216","https://openalex.org/W405964254","https://openalex.org/W4295941380","https://openalex.org/W2045758229","https://openalex.org/W4319083788"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"is":[3,25,58,108,120,140,156,161,166],"one":[4],"of":[5,18,23,31,66,91,112,136,173,198,207,233,249],"the":[6,16,32,37,67,71,79,83,89,92,97,113,117,123,127,134,137,159,163,171,174,199,205,208,223,230,234,244,269],"popular":[7],"methods":[8],"for":[9,48,195,252],"intelligent":[10],"control":[11,274],"and":[12,41,53,63,86,105,247,265],"decision":[13],"making":[14],"in":[15,78,96,116,146,170,182,211,225,237,262,275],"field":[17],"robotics":[19],"recently.":[20],"The":[21,100],"goal":[22],"RL":[24,49,57,107,185,251,264],"to":[26,69,75,81,168,179,187,202,222,268],"learn":[27,188],"an":[28,154,220],"optimal":[29,128,164,191,209],"policy":[30,129,165,192,210],"agent":[33,68,124,160],"by":[34,60,158],"interacting":[35],"with":[36,150,219],"environment":[38,80,119,149,175],"via":[39],"trail":[40],"error.":[42],"There":[43],"are":[44],"two":[45],"main":[46,101],"algorithms":[47,224],"problems,":[50],"including":[51],"model-free":[52,106],"model-based":[54,104],"methods.":[55],"Model-free":[56],"driven":[59],"historical":[61,144],"trajectories":[62],"empirical":[64],"data":[65,85,145],"optimize":[70],"policy,":[72],"which":[73,176],"needs":[74],"take":[76],"actions":[77],"collect":[82],"trajectory":[84],"may":[87],"cause":[88],"damage":[90],"robot":[93,253,273],"during":[94],"training":[95],"real":[98],"environment.":[99],"different":[102],"between":[103],"that":[109,162,193],"a":[110,147,189],"model":[111,135,172,196,231],"transition":[114,138,200,235],"probability":[115,139,201,236],"interaction":[118],"employed.":[121],"Thus":[122],"can":[125,177],"search":[126],"through":[130],"internal":[131],"simulation.":[132],"However,":[133],"usually":[141],"estimated":[142],"from":[143],"single":[148],"statistical":[151],"errors.":[152],"Therefore,":[153],"issue":[155],"faced":[157],"sensitive":[167],"perturbations":[169],"lead":[178],"serious":[180],"degradation":[181],"performance.":[183],"Robust":[184],"aims":[186],"robust":[190,238,250,263],"accounts":[194],"uncertainty":[197,232],"systematically":[203],"mitigate":[204],"sensitivity":[206],"perturbed":[212],"environments.":[213,277],"In":[214,240],"this":[215],"overview,":[216],"we":[217,242,257],"begin":[218],"introduction":[221],"RL,":[226],"then":[227],"focus":[228],"on":[229],"RL.":[239],"parallel,":[241],"highlight":[243],"current":[245],"research":[246,260],"challenges":[248],"control.":[254],"To":[255],"conclude,":[256],"describe":[258],"some":[259],"areas":[261],"look":[266],"ahead":[267],"future":[270],"work":[271],"about":[272],"complex":[276]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
