{"id":"https://openalex.org/W4221110788","doi":"https://doi.org/10.3390/make4010013","title":"Robust Reinforcement Learning: A Review of Foundations and Recent Advances","display_name":"Robust Reinforcement Learning: A Review of Foundations and Recent Advances","publication_year":2022,"publication_date":"2022-03-19","ids":{"openalex":"https://openalex.org/W4221110788","doi":"https://doi.org/10.3390/make4010013"},"language":"en","primary_location":{"id":"doi:10.3390/make4010013","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make4010013","pdf_url":"https://www.mdpi.com/2504-4990/4/1/13/pdf?version=1647850689","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","datacite","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/4/1/13/pdf?version=1647850689","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051916137","display_name":"Janosch Moos","orcid":"https://orcid.org/0000-0003-2484-3830"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Janosch Moos","raw_affiliation_strings":["Institute for Mechatronic Systems in Mechanical Engineering, Technical University of Darmstadt, 64287 Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0003-2484-3830","affiliations":[{"raw_affiliation_string":"Institute for Mechatronic Systems in Mechanical Engineering, Technical University of Darmstadt, 64287 Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000980093","display_name":"Kay Hansel","orcid":"https://orcid.org/0000-0002-8448-4510"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Kay Hansel","raw_affiliation_strings":["Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0002-8448-4510","affiliations":[{"raw_affiliation_string":"Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080021372","display_name":"Hany Abdulsamad","orcid":"https://orcid.org/0000-0001-8683-8784"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hany Abdulsamad","raw_affiliation_strings":["Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010822957","display_name":"Svenja Stark","orcid":null},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Svenja Stark","raw_affiliation_strings":["Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055577582","display_name":"Debora Clever","orcid":"https://orcid.org/0000-0002-0542-3833"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]},{"id":"https://openalex.org/I4210166982","display_name":"ABB (Germany)","ror":"https://ror.org/05yeg2858","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210166982","https://openalex.org/I885143765"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Debora Clever","raw_affiliation_strings":["ABB AG, 68309 Mannheim, Germany","Institute for Mechatronic Systems in Mechanical Engineering, Technical University of Darmstadt, 64287 Darmstadt, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ABB AG, 68309 Mannheim, Germany","institution_ids":["https://openalex.org/I4210166982"]},{"raw_affiliation_string":"Institute for Mechatronic Systems in Mechanical Engineering, Technical University of Darmstadt, 64287 Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071367253","display_name":"Jan Peters","orcid":"https://orcid.org/0000-0002-5266-8091"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Peters","raw_affiliation_strings":["Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intelligent Autonomous Systems in Computer Science, Technical University of Darmstadt, 64289 Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5000980093","https://openalex.org/A5051916137"],"corresponding_institution_ids":["https://openalex.org/I31512782"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":16.8965,"has_fulltext":true,"cited_by_count":112,"citation_normalized_percentile":{"value":0.99570829,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"4","issue":"1","first_page":"276","last_page":"315"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11810","display_name":"Complex Systems and Decision Making","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11810","display_name":"Complex Systems and Decision Making","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11807","display_name":"Infrastructure Resilience and Vulnerability Analysis","score":0.944100022315979,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8362669944763184},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.746950089931488},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6720919609069824},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6450098752975464},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.608207106590271},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6028057336807251},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4514341950416565},{"id":"https://openalex.org/keywords/robust-control","display_name":"Robust control","score":0.4307185411453247},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4274660348892212},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4240267276763916},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35860440135002136},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.34312376379966736},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16900676488876343},{"id":"https://openalex.org/keywords/control-system","display_name":"Control system","score":0.13590127229690552},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13169679045677185},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.08353888988494873}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8362669944763184},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.746950089931488},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6720919609069824},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6450098752975464},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.608207106590271},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6028057336807251},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4514341950416565},{"id":"https://openalex.org/C31531917","wikidata":"https://www.wikidata.org/wiki/Q915157","display_name":"Robust control","level":3,"score":0.4307185411453247},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4274660348892212},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4240267276763916},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35860440135002136},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.34312376379966736},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16900676488876343},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.13590127229690552},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13169679045677185},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08353888988494873},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.3390/make4010013","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make4010013","pdf_url":"https://www.mdpi.com/2504-4990/4/1/13/pdf?version=1647850689","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:tuprints.ulb.tu-darmstadt.de:21118","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401590","display_name":"Technischen Universit\u00e4t Darmstadt","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:06312d15438b463db5951628332ac3c8","is_oa":false,"landing_page_url":"https://doaj.org/article/06312d15438b463db5951628332ac3c8","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 4, Iss 1, Pp 276-315 (2022)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2504-4990/4/1/13/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/make4010013","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction; Volume 4; Issue 1; Pages: 276-315","raw_type":"Text"},{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:132125","is_oa":false,"landing_page_url":"http://tubiblio.ulb.tu-darmstadt.de/132125/","pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Artikel"},{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:147327","is_oa":false,"landing_page_url":"http://tubiblio.ulb.tu-darmstadt.de/147327/","pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Artikel"},{"id":"doi:10.26083/tuprints-00021118","is_oa":true,"landing_page_url":"https://doi.org/10.26083/tuprints-00021118","pdf_url":null,"source":{"id":"https://openalex.org/S7407051655","display_name":"TUprints","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.3390/make4010013","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make4010013","pdf_url":"https://www.mdpi.com/2504-4990/4/1/13/pdf?version=1647850689","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.8100000023841858,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323384","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4221110788.pdf","grobid_xml":"https://content.openalex.org/works/W4221110788.grobid-xml"},"referenced_works_count":122,"referenced_works":["https://openalex.org/W134786152","https://openalex.org/W206679605","https://openalex.org/W1499669280","https://openalex.org/W1542941925","https://openalex.org/W1579184372","https://openalex.org/W1587799944","https://openalex.org/W1590693676","https://openalex.org/W1626977535","https://openalex.org/W1687873425","https://openalex.org/W1835254890","https://openalex.org/W1892947258","https://openalex.org/W1896074376","https://openalex.org/W1949984268","https://openalex.org/W1965878388","https://openalex.org/W1968315580","https://openalex.org/W1973039793","https://openalex.org/W1980516134","https://openalex.org/W1993377828","https://openalex.org/W1999918998","https://openalex.org/W2006093909","https://openalex.org/W2019095063","https://openalex.org/W2027591506","https://openalex.org/W2028145673","https://openalex.org/W2036931518","https://openalex.org/W2041357003","https://openalex.org/W2042459810","https://openalex.org/W2045817059","https://openalex.org/W2047901468","https://openalex.org/W2055300044","https://openalex.org/W2055504162","https://openalex.org/W2058935279","https://openalex.org/W2069045459","https://openalex.org/W2069918405","https://openalex.org/W2076337359","https://openalex.org/W2081920433","https://openalex.org/W2088413745","https://openalex.org/W2089105401","https://openalex.org/W2089415692","https://openalex.org/W2100072100","https://openalex.org/W2100110221","https://openalex.org/W2100827217","https://openalex.org/W2104602264","https://openalex.org/W2105934661","https://openalex.org/W2106929622","https://openalex.org/W2109640889","https://openalex.org/W2113151258","https://openalex.org/W2113501460","https://openalex.org/W2114682338","https://openalex.org/W2119717200","https://openalex.org/W2123651102","https://openalex.org/W2125417745","https://openalex.org/W2133354309","https://openalex.org/W2136503687","https://openalex.org/W2145341417","https://openalex.org/W2155027007","https://openalex.org/W2155153696","https://openalex.org/W2158782408","https://openalex.org/W2165622730","https://openalex.org/W2167433878","https://openalex.org/W2168359464","https://openalex.org/W2168565265","https://openalex.org/W2172968643","https://openalex.org/W2173261491","https://openalex.org/W2180612164","https://openalex.org/W2257979135","https://openalex.org/W2330525833","https://openalex.org/W2395575420","https://openalex.org/W2498694880","https://openalex.org/W2553297237","https://openalex.org/W2562747313","https://openalex.org/W2575731723","https://openalex.org/W2603766943","https://openalex.org/W2617547828","https://openalex.org/W2623431351","https://openalex.org/W2746553466","https://openalex.org/W2750605955","https://openalex.org/W2773525213","https://openalex.org/W2773691349","https://openalex.org/W2781726626","https://openalex.org/W2794643322","https://openalex.org/W2905838893","https://openalex.org/W2913597562","https://openalex.org/W2914994663","https://openalex.org/W2921673526","https://openalex.org/W2945586853","https://openalex.org/W2946606218","https://openalex.org/W2949963197","https://openalex.org/W2962850106","https://openalex.org/W2963448658","https://openalex.org/W2963684914","https://openalex.org/W2967292964","https://openalex.org/W2982312374","https://openalex.org/W2999905431","https://openalex.org/W3001311638","https://openalex.org/W3003521509","https://openalex.org/W3035578948","https://openalex.org/W3045875028","https://openalex.org/W3091444389","https://openalex.org/W3093099641","https://openalex.org/W3110979110","https://openalex.org/W3118932025","https://openalex.org/W3130610232","https://openalex.org/W3156295478","https://openalex.org/W3174652807","https://openalex.org/W4234761190","https://openalex.org/W4242001891","https://openalex.org/W4243620103","https://openalex.org/W4250589301","https://openalex.org/W4251616545","https://openalex.org/W4252284432","https://openalex.org/W4287755265","https://openalex.org/W6634952046","https://openalex.org/W6636868823","https://openalex.org/W6638018090","https://openalex.org/W6669402789","https://openalex.org/W6679257226","https://openalex.org/W6682729792","https://openalex.org/W6735677848","https://openalex.org/W6767327128","https://openalex.org/W6780383567","https://openalex.org/W6784152626","https://openalex.org/W7062612498"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2768698792","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W1985560493","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W2341346307","https://openalex.org/W3168977894"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,10,62,170],"(RL)":[2],"has":[3],"become":[4],"a":[5,137],"highly":[6],"successful":[7],"framework":[8],"for":[9,76],"in":[11,22,50,67,78,99],"Markov":[12],"decision":[13],"processes":[14],"(MDP).":[15],"Due":[16],"to":[17,44,60,96,150,167],"the":[18,51,55,79,84,100,110,124,129,141,146,151,165],"adoption":[19],"of":[20,34,109,128,132,140,148],"RL":[21,35,39,155],"realistic":[23],"and":[24,63,153,171],"complex":[25],"environments,":[26],"solution":[27],"robustness":[28,43,149],"becomes":[29],"an":[30,114],"increasingly":[31],"important":[32],"aspect":[33,139],"deployment.":[36],"Nevertheless,":[37],"current":[38],"algorithms":[40],"struggle":[41],"with":[42],"uncertainty,":[45],"disturbances,":[46],"or":[47,122],"structural":[48],"changes":[49],"environment.":[52],"We":[53],"survey":[54,159],"literature":[56],"on":[57],"robust":[58,73,91,105,119,134,168],"approaches":[59,166],"reinforcement":[61,169],"categorize":[64],"these":[65,133],"methods":[66],"four":[68],"different":[69,138],"ways:":[70],"(i)":[71],"Transition":[72],"designs":[74,92,106,120,135],"account":[75],"uncertainties":[77],"system":[80,101,111,126],"dynamics":[81],"by":[82,112],"manipulating":[83],"transition":[85],"probabilities":[86],"between":[87],"states;":[88],"(ii)":[89],"Disturbance":[90],"leverage":[93],"external":[94],"forces":[95],"model":[97],"uncertainty":[98],"behavior;":[102],"(iii)":[103],"Action":[104],"redirect":[107],"transitions":[108],"corrupting":[113],"agent\u2019s":[115],"output;":[116],"(iv)":[117],"Observation":[118],"exploit":[121],"distort":[123],"perceived":[125],"state":[127],"policy.":[130],"Each":[131],"alters":[136],"MDP.":[142],"Additionally,":[143],"we":[144],"address":[145],"connection":[147],"risk-based":[152],"entropy-regularized":[154],"formulations.":[156],"The":[157],"resulting":[158],"covers":[160],"all":[161],"fundamental":[162],"concepts":[163],"underlying":[164],"their":[172],"recent":[173],"advances.":[174]},"counts_by_year":[{"year":2026,"cited_by_count":12},{"year":2025,"cited_by_count":41},{"year":2024,"cited_by_count":32},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
